Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.blm.orc; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY; import java.io.EOFException; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.orc.OrcProto; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim; import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import com.google.common.collect.ComparisonChain; class RecordReaderImpl implements RecordReader { private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); private static final boolean isLogTraceEnabled = LOG.isTraceEnabled(); private final FSDataInputStream file; private final long firstRow; private final List<StripeInformation> stripes = new ArrayList<StripeInformation>(); private OrcProto.StripeFooter stripeFooter; private final long totalRowCount; private final CompressionCodec codec; private final List<OrcProto.Type> types; private final int bufferSize; private final boolean[] included; private final long rowIndexStride; private long rowInStripe = 0; private int currentStripe = -1; private long rowBaseInStripe = 0; private long rowCountInStripe = 0; private final Map<StreamName, InStream> streams = new HashMap<StreamName, InStream>(); List<BufferChunk> bufferChunks = new ArrayList<BufferChunk>(0); private final TreeReader reader; private final OrcProto.RowIndex[] indexes; private final SearchArgument sarg; // the leaf predicates for the sarg private final List<PredicateLeaf> sargLeaves; // an array the same length as the sargLeaves that map them to column ids private final int[] filterColumns; // an array about which row groups aren't skipped private boolean[] includedRowGroups = null; private final Configuration conf; private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool(); private final ZeroCopyReaderShim zcr; // this is an implementation copied from ElasticByteBufferPool in hadoop-2, // which lacks a clear()/clean() operation public final static class ByteBufferAllocatorPool implements ByteBufferPoolShim { private static final class Key implements Comparable<Key> { private final int capacity; private final long insertionGeneration; Key(int capacity, long insertionGeneration) { this.capacity = capacity; this.insertionGeneration = insertionGeneration; } @Override public int compareTo(Key other) { return ComparisonChain.start().compare(capacity, other.capacity) .compare(insertionGeneration, other.insertionGeneration).result(); } @Override public boolean equals(Object rhs) { if (rhs == null) { return false; } try { Key o = (Key) rhs; return (compareTo(o) == 0); } catch (ClassCastException e) { return false; } } @Override public int hashCode() { return new HashCodeBuilder().append(capacity).append(insertionGeneration).toHashCode(); } } private final TreeMap<Key, ByteBuffer> buffers = new TreeMap<Key, ByteBuffer>(); private final TreeMap<Key, ByteBuffer> directBuffers = new TreeMap<Key, ByteBuffer>(); private long currentGeneration = 0; private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) { return direct ? directBuffers : buffers; } public void clear() { buffers.clear(); directBuffers.clear(); } @Override public ByteBuffer getBuffer(boolean direct, int length) { TreeMap<Key, ByteBuffer> tree = getBufferTree(direct); Map.Entry<Key, ByteBuffer> entry = tree.ceilingEntry(new Key(length, 0)); if (entry == null) { return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length); } tree.remove(entry.getKey()); return entry.getValue(); } @Override public void putBuffer(ByteBuffer buffer) { TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect()); while (true) { Key key = new Key(buffer.capacity(), currentGeneration++); if (!tree.containsKey(key)) { tree.put(key, buffer); return; } // Buffers are indexed by (capacity, generation). // If our key is not unique on the first try, we try again } } } /** * Given a list of column names, find the given column and return the index. * @param columnNames the list of potential column names * @param columnName the column name to look for * @param rootColumn offset the result with the rootColumn * @return the column number or -1 if the column wasn't found */ static int findColumns(String[] columnNames, String columnName, int rootColumn) { for (int i = 0; i < columnNames.length; ++i) { if (columnName.equals(columnNames[i])) { return i + rootColumn; } } return -1; } /** * Find the mapping from predicate leaves to columns. * @param sargLeaves the search argument that we need to map * @param columnNames the names of the columns * @param rootColumn the offset of the top level row, which offsets the * result * @return an array mapping the sarg leaves to concrete column numbers */ static int[] mapSargColumns(List<PredicateLeaf> sargLeaves, String[] columnNames, int rootColumn) { int[] result = new int[sargLeaves.size()]; Arrays.fill(result, -1); for (int i = 0; i < result.length; ++i) { String colName = sargLeaves.get(i).getColumnName(); result[i] = findColumns(columnNames, colName, rootColumn); } return result; } RecordReaderImpl(List<StripeInformation> stripes, FileSystem fileSystem, Path path, Reader.Options options, List<OrcProto.Type> types, CompressionCodec codec, int bufferSize, long strideRate, Configuration conf) throws IOException { this.file = fileSystem.open(path); this.codec = codec; this.types = types; this.bufferSize = bufferSize; this.included = options.getInclude(); this.conf = conf; this.sarg = options.getSearchArgument(); if (sarg != null) { sargLeaves = sarg.getLeaves(); filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0); } else { sargLeaves = null; filterColumns = null; } long rows = 0; long skippedRows = 0; long offset = options.getOffset(); long maxOffset = options.getMaxOffset(); for (StripeInformation stripe : stripes) { long stripeStart = stripe.getOffset(); if (offset > stripeStart) { skippedRows += stripe.getNumberOfRows(); } else if (stripeStart < maxOffset) { this.stripes.add(stripe); rows += stripe.getNumberOfRows(); } } final boolean zeroCopy = (conf != null) && (HiveConf.getBoolVar(conf, HIVE_ORC_ZEROCOPY)); if (zeroCopy && (codec == null || ((codec instanceof DirectDecompressionCodec) && ((DirectDecompressionCodec) codec).isAvailable()))) { /* codec is null or is available */ this.zcr = ShimLoader.getHadoopShims().getZeroCopyReader(file, pool); } else { this.zcr = null; } firstRow = skippedRows; totalRowCount = rows; reader = createTreeReader(path, 0, types, included, conf); indexes = new OrcProto.RowIndex[types.size()]; rowIndexStride = strideRate; advanceToNextRow(0L); } private static final class PositionProviderImpl implements PositionProvider { private final OrcProto.RowIndexEntry entry; private int index = 0; PositionProviderImpl(OrcProto.RowIndexEntry entry) { this.entry = entry; } @Override public long getNext() { return entry.getPositions(index++); } } private abstract static class TreeReader { protected final Path path; protected final int columnId; private BitFieldReader present = null; protected boolean valuePresent = false; protected final Configuration conf; TreeReader(Path path, int columnId, Configuration conf) { this.path = path; this.columnId = columnId; this.conf = conf; } void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind, InStream in, boolean signed) throws IOException { switch (kind) { case DIRECT_V2: case DICTIONARY_V2: return new RunLengthIntegerReaderV2(in, signed, conf); case DIRECT: case DICTIONARY: return new RunLengthIntegerReader(in, signed); default: throw new IllegalArgumentException("Unknown encoding " + kind); } } void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encoding) throws IOException { checkEncoding(encoding.get(columnId)); InStream in = streams.get(new StreamName(columnId, OrcProto.Stream.Kind.PRESENT)); if (in == null) { present = null; valuePresent = true; } else { present = new BitFieldReader(in, 1); } } /** * Seek to the given position. * @param index the indexes loaded from the file * @throws IOException */ void seek(PositionProvider[] index) throws IOException { if (present != null) { present.seek(index[columnId]); } } protected long countNonNulls(long rows) throws IOException { if (present != null) { long result = 0; for (long c = 0; c < rows; ++c) { if (present.next() == 1) { result += 1; } } return result; } else { return rows; } } abstract void skipRows(long rows) throws IOException; Object next(Object previous) throws IOException { if (present != null) { valuePresent = present.next() == 1; } return previous; } /** * Populates the isNull vector array in the previousVector object based on * the present stream values. This function is called from all the child * readers, and they all set the values based on isNull field value. * @param previousVector The columnVector object whose isNull value is populated * @param batchSize Size of the column vector * @return * @throws IOException */ Object nextVector(Object previousVector, long batchSize) throws IOException { ColumnVector result = (ColumnVector) previousVector; if (present != null) { // Set noNulls and isNull vector of the ColumnVector based on // present stream result.noNulls = true; for (int i = 0; i < batchSize; i++) { result.isNull[i] = (present.next() != 1); if (result.noNulls && result.isNull[i]) { result.noNulls = false; } } } else { // There is not present stream, this means that all the values are // present. result.noNulls = true; for (int i = 0; i < batchSize; i++) { result.isNull[i] = false; } } return previousVector; } } private static class BooleanTreeReader extends TreeReader { private BitFieldReader reader = null; BooleanTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); reader = new BitFieldReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)), 1); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } @Override Object next(Object previous) throws IOException { super.next(previous); BooleanWritable result = null; if (valuePresent) { if (previous == null) { result = new BooleanWritable(); } else { result = (BooleanWritable) previous; } result.set(reader.next() == 1); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } } private static class ByteTreeReader extends TreeReader { private RunLengthByteReader reader = null; ByteTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); reader = new RunLengthByteReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA))); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); ByteWritable result = null; if (valuePresent) { if (previous == null) { result = new ByteWritable(); } else { result = (ByteWritable) previous; } result.set(reader.next()); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class ShortTreeReader extends TreeReader { private IntegerReader reader = null; ShortTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); ShortWritable result = null; if (valuePresent) { if (previous == null) { result = new ShortWritable(); } else { result = (ShortWritable) previous; } result.set((short) reader.next()); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class IntTreeReader extends TreeReader { private IntegerReader reader = null; IntTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); IntWritable result = null; if (valuePresent) { if (previous == null) { result = new IntWritable(); } else { result = (IntWritable) previous; } result.set((int) reader.next()); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class LongTreeReader extends TreeReader { private IntegerReader reader = null; LongTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); LongWritable result = null; if (valuePresent) { if (previous == null) { result = new LongWritable(); } else { result = (LongWritable) previous; } result.set(reader.next()); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class FloatTreeReader extends TreeReader { private InStream stream; private final SerializationUtils utils; FloatTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); this.utils = new SerializationUtils(); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); stream.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); FloatWritable result = null; if (valuePresent) { if (previous == null) { result = new FloatWritable(); } else { result = (FloatWritable) previous; } result.set(utils.readFloat(stream)); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { DoubleColumnVector result = null; if (previousVector == null) { result = new DoubleColumnVector(); } else { result = (DoubleColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries for (int i = 0; i < batchSize; i++) { if (!result.isNull[i]) { result.vector[i] = utils.readFloat(stream); } else { // If the value is not present then set NaN result.vector[i] = Double.NaN; } } // Set isRepeating flag result.isRepeating = true; for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { if (result.vector[i] != result.vector[i + 1]) { result.isRepeating = false; } } return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); for (int i = 0; i < items; ++i) { utils.readFloat(stream); } } } private static class DoubleTreeReader extends TreeReader { private InStream stream; private final SerializationUtils utils; DoubleTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); this.utils = new SerializationUtils(); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); stream.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); DoubleWritable result = null; if (valuePresent) { if (previous == null) { result = new DoubleWritable(); } else { result = (DoubleWritable) previous; } result.set(utils.readDouble(stream)); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { DoubleColumnVector result = null; if (previousVector == null) { result = new DoubleColumnVector(); } else { result = (DoubleColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries for (int i = 0; i < batchSize; i++) { if (!result.isNull[i]) { result.vector[i] = utils.readDouble(stream); } else { // If the value is not present then set NaN result.vector[i] = Double.NaN; } } // Set isRepeating flag result.isRepeating = true; for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { if (result.vector[i] != result.vector[i + 1]) { result.isRepeating = false; } } return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); stream.skip(items * 8); } } private static class BinaryTreeReader extends TreeReader { protected InStream stream; protected IntegerReader lengths = null; protected final LongColumnVector scratchlcv; BinaryTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); scratchlcv = new LongColumnVector(); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); stream.seek(index[columnId]); lengths.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); BytesWritable result = null; if (valuePresent) { if (previous == null) { result = new BytesWritable(); } else { result = (BytesWritable) previous; } int len = (int) lengths.next(); result.setSize(len); int offset = 0; while (len > 0) { int written = stream.read(result.getBytes(), offset, len); if (written < 0) { throw new EOFException("Can't finish byte read from " + stream); } len -= written; offset += written; } } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { BytesColumnVector result = null; if (previousVector == null) { result = new BytesColumnVector(); } else { result = (BytesColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); long lengthToSkip = 0; for (int i = 0; i < items; ++i) { lengthToSkip += lengths.next(); } stream.skip(lengthToSkip); } } private static class TimestampTreeReader extends TreeReader { private IntegerReader data = null; private IntegerReader nanos = null; private final LongColumnVector nanoVector = new LongColumnVector(); TimestampTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); data = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)), true); nanos = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), false); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); data.seek(index[columnId]); nanos.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); TimestampWritable result = null; if (valuePresent) { if (previous == null) { result = new TimestampWritable(); } else { result = (TimestampWritable) previous; } Timestamp ts = new Timestamp(0); long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) * WriterImpl.MILLIS_PER_SECOND; int newNanos = parseNanos(nanos.next()); // fix the rounding when we divided by 1000. if (millis >= 0) { millis += newNanos / 1000000; } else { millis -= newNanos / 1000000; } ts.setTime(millis); ts.setNanos(newNanos); result.set(ts); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } result.reset(); Object obj = null; for (int i = 0; i < batchSize; i++) { obj = next(obj); if (obj == null) { result.noNulls = false; result.isNull[i] = true; } else { TimestampWritable writable = (TimestampWritable) obj; Timestamp timestamp = writable.getTimestamp(); result.vector[i] = TimestampUtils.getTimeNanoSec(timestamp); } } return result; } private static int parseNanos(long serialized) { int zeros = 7 & (int) serialized; int result = (int) (serialized >>> 3); if (zeros != 0) { for (int i = 0; i <= zeros; ++i) { result *= 10; } } return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); data.skip(items); nanos.skip(items); } } private static class DateTreeReader extends TreeReader { private IntegerReader reader = null; DateTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); DateWritable result = null; if (valuePresent) { if (previous == null) { result = new DateWritable(); } else { result = (DateWritable) previous; } result.set((int) reader.next()); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { LongColumnVector result = null; if (previousVector == null) { result = new LongColumnVector(); } else { result = (LongColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries reader.nextVector(result, batchSize); return result; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class DecimalTreeReader extends TreeReader { private InStream valueStream; private IntegerReader scaleStream = null; private LongColumnVector scratchScaleVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); private final int precision; private final int scale; DecimalTreeReader(Path path, int columnId, int precision, int scale, Configuration conf) { super(path, columnId, conf); this.precision = precision; this.scale = scale; } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); valueStream = streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)); scaleStream = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); valueStream.seek(index[columnId]); scaleStream.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); HiveDecimalWritable result = null; if (valuePresent) { if (previous == null) { result = new HiveDecimalWritable(); } else { result = (HiveDecimalWritable) previous; } result.set(HiveDecimal.create(SerializationUtils.readBigInteger(valueStream), (int) scaleStream.next())); return HiveDecimalUtils.enforcePrecisionScale(result, precision, scale); } return null; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { DecimalColumnVector result = null; if (previousVector == null) { result = new DecimalColumnVector(precision, scale); } else { result = (DecimalColumnVector) previousVector; } // Save the reference for isNull in the scratch vector boolean[] scratchIsNull = scratchScaleVector.isNull; // Read present/isNull stream super.nextVector(result, batchSize); // Read value entries based on isNull entries if (result.isRepeating) { if (!result.isNull[0]) { BigInteger bInt = SerializationUtils.readBigInteger(valueStream); short scaleInData = (short) scaleStream.next(); HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); result.set(0, dec); } } else { // result vector has isNull values set, use the same to read scale vector. scratchScaleVector.isNull = result.isNull; scaleStream.nextVector(scratchScaleVector, batchSize); for (int i = 0; i < batchSize; i++) { if (!result.isNull[i]) { BigInteger bInt = SerializationUtils.readBigInteger(valueStream); short scaleInData = (short) scratchScaleVector.vector[i]; HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); result.set(i, dec); } } } // Switch back the null vector. scratchScaleVector.isNull = scratchIsNull; return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); for (int i = 0; i < items; i++) { SerializationUtils.readBigInteger(valueStream); } scaleStream.skip(items); } } /** * A tree reader that will read string columns. At the start of the * stripe, it creates an internal reader based on whether a direct or * dictionary encoding was used. */ private static class StringTreeReader extends TreeReader { private TreeReader reader; StringTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { reader.checkEncoding(encoding); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { // For each stripe, checks the encoding and initializes the appropriate // reader switch (encodings.get(columnId).getKind()) { case DIRECT: case DIRECT_V2: reader = new StringDirectTreeReader(path, columnId, conf); break; case DICTIONARY: case DICTIONARY_V2: reader = new StringDictionaryTreeReader(path, columnId, conf); break; default: throw new IllegalArgumentException("Unsupported encoding " + encodings.get(columnId).getKind()); } reader.startStripe(streams, encodings); } @Override void seek(PositionProvider[] index) throws IOException { reader.seek(index); } @Override Object next(Object previous) throws IOException { return reader.next(previous); } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { return reader.nextVector(previousVector, batchSize); } @Override void skipRows(long items) throws IOException { reader.skipRows(items); } } // This class collects together very similar methods for reading an ORC vector of byte arrays and // creating the BytesColumnVector. // private static class BytesColumnVectorUtil { private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, BytesColumnVector result, long batchSize) throws IOException { // Read lengths scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here... lengths.nextVector(scratchlcv, batchSize); int totalLength = 0; if (!scratchlcv.isRepeating) { for (int i = 0; i < batchSize; i++) { if (!scratchlcv.isNull[i]) { totalLength += (int) scratchlcv.vector[i]; } } } else { if (!scratchlcv.isNull[0]) { totalLength = (int) (batchSize * scratchlcv.vector[0]); } } // Read all the strings for this batch byte[] allBytes = new byte[totalLength]; int offset = 0; int len = totalLength; while (len > 0) { int bytesRead = stream.read(allBytes, offset, len); if (bytesRead < 0) { throw new EOFException("Can't finish byte read from " + stream); } len -= bytesRead; offset += bytesRead; } return allBytes; } // This method has the common code for reading in bytes into a BytesColumnVector. public static void readOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, BytesColumnVector result, long batchSize) throws IOException { byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv, result, batchSize); // Too expensive to figure out 'repeating' by comparisons. result.isRepeating = false; int offset = 0; if (!scratchlcv.isRepeating) { for (int i = 0; i < batchSize; i++) { if (!scratchlcv.isNull[i]) { result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]); offset += scratchlcv.vector[i]; } else { result.setRef(i, allBytes, 0, 0); } } } else { for (int i = 0; i < batchSize; i++) { if (!scratchlcv.isNull[i]) { result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]); offset += scratchlcv.vector[0]; } else { result.setRef(i, allBytes, 0, 0); } } } } } /** * A reader for string columns that are direct encoded in the current * stripe. */ private static class StringDirectTreeReader extends TreeReader { private InStream stream; private IntegerReader lengths; private final LongColumnVector scratchlcv; StringDirectTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); scratchlcv = new LongColumnVector(); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT && encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); stream.seek(index[columnId]); lengths.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); Text result = null; if (valuePresent) { if (previous == null) { result = new Text(); } else { result = (Text) previous; } int len = (int) lengths.next(); int offset = 0; byte[] bytes = new byte[len]; while (len > 0) { int written = stream.read(bytes, offset, len); if (written < 0) { throw new EOFException("Can't finish byte read from " + stream); } len -= written; offset += written; } result.set(bytes); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { BytesColumnVector result = null; if (previousVector == null) { result = new BytesColumnVector(); } else { result = (BytesColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); return result; } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); long lengthToSkip = 0; for (int i = 0; i < items; ++i) { lengthToSkip += lengths.next(); } stream.skip(lengthToSkip); } } /** * A reader for string columns that are dictionary encoded in the current * stripe. */ private static class StringDictionaryTreeReader extends TreeReader { private DynamicByteArray dictionaryBuffer; private int[] dictionaryOffsets; private IntegerReader reader; private byte[] dictionaryBufferInBytesCache = null; private final LongColumnVector scratchlcv; StringDictionaryTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); scratchlcv = new LongColumnVector(); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY && encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); // read the dictionary blob int dictionarySize = encodings.get(columnId).getDictionarySize(); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DICTIONARY_DATA); InStream in = streams.get(name); if (in.available() > 0) { dictionaryBuffer = new DynamicByteArray(64, in.available()); dictionaryBuffer.readAll(in); // Since its start of strip invalidate the cache. dictionaryBufferInBytesCache = null; } else { dictionaryBuffer = null; } in.close(); // read the lengths name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); in = streams.get(name); IntegerReader lenReader = createIntegerReader(encodings.get(columnId).getKind(), in, false); int offset = 0; if (dictionaryOffsets == null || dictionaryOffsets.length < dictionarySize + 1) { dictionaryOffsets = new int[dictionarySize + 1]; } for (int i = 0; i < dictionarySize; ++i) { dictionaryOffsets[i] = offset; offset += (int) lenReader.next(); } dictionaryOffsets[dictionarySize] = offset; in.close(); // set up the row reader name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), false); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); reader.seek(index[columnId]); } @Override Object next(Object previous) throws IOException { super.next(previous); Text result = null; if (valuePresent) { int entry = (int) reader.next(); if (previous == null) { result = new Text(); } else { result = (Text) previous; } int offset = dictionaryOffsets[entry]; int length = getDictionaryEntryLength(entry, offset); // If the column is just empty strings, the size will be zero, // so the buffer will be null, in that case just return result // as it will default to empty if (dictionaryBuffer != null) { dictionaryBuffer.setText(result, offset, length); } else { result.clear(); } } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { BytesColumnVector result = null; int offset = 0, length = 0; if (previousVector == null) { result = new BytesColumnVector(); } else { result = (BytesColumnVector) previousVector; } // Read present/isNull stream super.nextVector(result, batchSize); if (dictionaryBuffer != null) { // Load dictionaryBuffer into cache. if (dictionaryBufferInBytesCache == null) { dictionaryBufferInBytesCache = dictionaryBuffer.get(); } // Read string offsets scratchlcv.isNull = result.isNull; reader.nextVector(scratchlcv, batchSize); if (!scratchlcv.isRepeating) { // The vector has non-repeating strings. Iterate thru the batch // and set strings one by one for (int i = 0; i < batchSize; i++) { if (!scratchlcv.isNull[i]) { offset = dictionaryOffsets[(int) scratchlcv.vector[i]]; length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset); result.setRef(i, dictionaryBufferInBytesCache, offset, length); } else { // If the value is null then set offset and length to zero (null string) result.setRef(i, dictionaryBufferInBytesCache, 0, 0); } } } else { // If the value is repeating then just set the first value in the // vector and set the isRepeating flag to true. No need to iterate thru and // set all the elements to the same value offset = dictionaryOffsets[(int) scratchlcv.vector[0]]; length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset); result.setRef(0, dictionaryBufferInBytesCache, offset, length); } result.isRepeating = scratchlcv.isRepeating; } else { // Entire stripe contains null strings. result.isRepeating = true; result.noNulls = false; result.isNull[0] = true; result.setRef(0, "".getBytes(), 0, 0); } return result; } int getDictionaryEntryLength(int entry, int offset) { int length = 0; // if it isn't the last entry, subtract the offsets otherwise use // the buffer length. if (entry < dictionaryOffsets.length - 1) { length = dictionaryOffsets[entry + 1] - offset; } else { length = dictionaryBuffer.size() - offset; } return length; } @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } } private static class CharTreeReader extends StringTreeReader { int maxLength; CharTreeReader(Path path, int columnId, int maxLength, Configuration conf) { super(path, columnId, conf); this.maxLength = maxLength; } @Override Object next(Object previous) throws IOException { HiveCharWritable result = null; if (previous == null) { result = new HiveCharWritable(); } else { result = (HiveCharWritable) previous; } // Use the string reader implementation to populate the internal Text value Object textVal = super.next(result.getTextValue()); if (textVal == null) { return null; } // result should now hold the value that was read in. // enforce char length result.enforceMaxLength(maxLength); return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { // Get the vector of strings from StringTreeReader, then make a 2nd pass to // adjust down the length (right trim and truncate) if necessary. BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); int adjustedDownLen; if (result.isRepeating) { if (result.noNulls || !result.isNull[0]) { adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength); if (adjustedDownLen < result.length[0]) { result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); } } } else { if (result.noNulls) { for (int i = 0; i < batchSize; i++) { adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); if (adjustedDownLen < result.length[i]) { result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); } } } else { for (int i = 0; i < batchSize; i++) { if (!result.isNull[i]) { adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); if (adjustedDownLen < result.length[i]) { result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); } } } } } return result; } } private static class VarcharTreeReader extends StringTreeReader { int maxLength; VarcharTreeReader(Path path, int columnId, int maxLength, Configuration conf) { super(path, columnId, conf); this.maxLength = maxLength; } @Override Object next(Object previous) throws IOException { HiveVarcharWritable result = null; if (previous == null) { result = new HiveVarcharWritable(); } else { result = (HiveVarcharWritable) previous; } // Use the string reader implementation to populate the internal Text value Object textVal = super.next(result.getTextValue()); if (textVal == null) { return null; } // result should now hold the value that was read in. // enforce varchar length result.enforceMaxLength(maxLength); return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { // Get the vector of strings from StringTreeReader, then make a 2nd pass to // adjust down the length (truncate) if necessary. BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); int adjustedDownLen; if (result.isRepeating) { if (result.noNulls || !result.isNull[0]) { adjustedDownLen = StringExpr.truncate(result.vector[0], result.start[0], result.length[0], maxLength); if (adjustedDownLen < result.length[0]) { result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); } } } else { if (result.noNulls) { for (int i = 0; i < batchSize; i++) { adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); if (adjustedDownLen < result.length[i]) { result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); } } } else { for (int i = 0; i < batchSize; i++) { if (!result.isNull[i]) { adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); if (adjustedDownLen < result.length[i]) { result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); } } } } } return result; } } private static class StructTreeReader extends TreeReader { private final TreeReader[] fields; private final String[] fieldNames; StructTreeReader(Path path, int columnId, List<OrcProto.Type> types, boolean[] included, Configuration conf) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int fieldCount = type.getFieldNamesCount(); this.fields = new TreeReader[fieldCount]; this.fieldNames = new String[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { this.fields[i] = createTreeReader(path, subtype, types, included, conf); } this.fieldNames[i] = type.getFieldNames(i); } } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); for (TreeReader kid : fields) { if (kid != null) { kid.seek(index); } } } @Override Object next(Object previous) throws IOException { super.next(previous); OrcStruct result = null; if (valuePresent) { if (previous == null) { result = new OrcStruct(fields.length); } else { result = (OrcStruct) previous; // If the input format was initialized with a file with a // different number of fields, the number of fields needs to // be updated to the correct number if (result.getNumFields() != fields.length) { result.setNumFields(fields.length); } } for (int i = 0; i < fields.length; ++i) { if (fields[i] != null) { result.setFieldValue(i, fields[i].next(result.getFieldValue(i))); } } } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { ColumnVector[] result = null; if (previousVector == null) { result = new ColumnVector[fields.length]; } else { result = (ColumnVector[]) previousVector; } // Read all the members of struct as column vectors for (int i = 0; i < fields.length; i++) { if (fields[i] != null) { if (result[i] == null) { result[i] = (ColumnVector) fields[i].nextVector(null, batchSize); } else { fields[i].nextVector(result[i], batchSize); } } } return result; } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); for (TreeReader field : fields) { if (field != null) { field.startStripe(streams, encodings); } } } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); for (TreeReader field : fields) { if (field != null) { field.skipRows(items); } } } } private static class UnionTreeReader extends TreeReader { private final TreeReader[] fields; private RunLengthByteReader tags; UnionTreeReader(Path path, int columnId, List<OrcProto.Type> types, boolean[] included, Configuration conf) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int fieldCount = type.getSubtypesCount(); this.fields = new TreeReader[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { this.fields[i] = createTreeReader(path, subtype, types, included, conf); } } } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); tags.seek(index[columnId]); for (TreeReader kid : fields) { kid.seek(index); } } @Override Object next(Object previous) throws IOException { super.next(previous); OrcUnion result = null; if (valuePresent) { if (previous == null) { result = new OrcUnion(); } else { result = (OrcUnion) previous; } byte tag = tags.next(); Object previousVal = result.getObject(); result.set(tag, fields[tag].next(tag == result.getTag() ? previousVal : null)); } return result; } @Override Object nextVector(Object previousVector, long batchSize) throws IOException { throw new UnsupportedOperationException("NextVector is not supported operation for Union type"); } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); tags = new RunLengthByteReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA))); for (TreeReader field : fields) { if (field != null) { field.startStripe(streams, encodings); } } } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); long[] counts = new long[fields.length]; for (int i = 0; i < items; ++i) { counts[tags.next()] += 1; } for (int i = 0; i < counts.length; ++i) { fields[i].skipRows(counts[i]); } } } private static class ListTreeReader extends TreeReader { private final TreeReader elementReader; private IntegerReader lengths = null; ListTreeReader(Path path, int columnId, List<OrcProto.Type> types, boolean[] included, Configuration conf) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); elementReader = createTreeReader(path, type.getSubtypes(0), types, included, conf); } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); lengths.seek(index[columnId]); elementReader.seek(index); } @Override @SuppressWarnings("unchecked") Object next(Object previous) throws IOException { super.next(previous); List<Object> result = null; if (valuePresent) { if (previous == null) { result = new ArrayList<Object>(); } else { result = (ArrayList<Object>) previous; } int prevLength = result.size(); int length = (int) lengths.next(); // extend the list to the new length for (int i = prevLength; i < length; ++i) { result.add(null); } // read the new elements into the array for (int i = 0; i < length; i++) { result.set(i, elementReader.next(i < prevLength ? result.get(i) : null)); } // remove any extra elements for (int i = prevLength - 1; i >= length; --i) { result.remove(i); } } return result; } @Override Object nextVector(Object previous, long batchSize) throws IOException { throw new UnsupportedOperationException("NextVector is not supported operation for List type"); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false); if (elementReader != null) { elementReader.startStripe(streams, encodings); } } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); long childSkip = 0; for (long i = 0; i < items; ++i) { childSkip += lengths.next(); } elementReader.skipRows(childSkip); } } private static class MapTreeReader extends TreeReader { private final TreeReader keyReader; private final TreeReader valueReader; private IntegerReader lengths = null; MapTreeReader(Path path, int columnId, List<OrcProto.Type> types, boolean[] included, Configuration conf) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int keyColumn = type.getSubtypes(0); int valueColumn = type.getSubtypes(1); if (included == null || included[keyColumn]) { keyReader = createTreeReader(path, keyColumn, types, included, conf); } else { keyReader = null; } if (included == null || included[valueColumn]) { valueReader = createTreeReader(path, valueColumn, types, included, conf); } else { valueReader = null; } } @Override void seek(PositionProvider[] index) throws IOException { super.seek(index); lengths.seek(index[columnId]); keyReader.seek(index); valueReader.seek(index); } @Override @SuppressWarnings("unchecked") Object next(Object previous) throws IOException { super.next(previous); Map<Object, Object> result = null; if (valuePresent) { if (previous == null) { result = new HashMap<Object, Object>(); } else { result = (HashMap<Object, Object>) previous; } // for now just clear and create new objects result.clear(); int length = (int) lengths.next(); // read the new elements into the array for (int i = 0; i < length; i++) { result.put(keyReader.next(null), valueReader.next(null)); } } return result; } @Override Object nextVector(Object previous, long batchSize) throws IOException { throw new UnsupportedOperationException("NextVector is not supported operation for Map type"); } @Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId + " of " + path); } } @Override void startStripe(Map<StreamName, InStream> streams, List<OrcProto.ColumnEncoding> encodings) throws IOException { super.startStripe(streams, encodings); lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false); if (keyReader != null) { keyReader.startStripe(streams, encodings); } if (valueReader != null) { valueReader.startStripe(streams, encodings); } } @Override void skipRows(long items) throws IOException { items = countNonNulls(items); long childSkip = 0; for (long i = 0; i < items; ++i) { childSkip += lengths.next(); } keyReader.skipRows(childSkip); valueReader.skipRows(childSkip); } } private static TreeReader createTreeReader(Path path, int columnId, List<OrcProto.Type> types, boolean[] included, Configuration conf) throws IOException { OrcProto.Type type = types.get(columnId); switch (type.getKind()) { case BOOLEAN: return new BooleanTreeReader(path, columnId, conf); case BYTE: return new ByteTreeReader(path, columnId, conf); case DOUBLE: return new DoubleTreeReader(path, columnId, conf); case FLOAT: return new FloatTreeReader(path, columnId, conf); case SHORT: return new ShortTreeReader(path, columnId, conf); case INT: return new IntTreeReader(path, columnId, conf); case LONG: return new LongTreeReader(path, columnId, conf); case STRING: return new StringTreeReader(path, columnId, conf); case CHAR: if (!type.hasMaximumLength()) { throw new IllegalArgumentException("ORC char type has no length specified"); } return new CharTreeReader(path, columnId, type.getMaximumLength(), conf); case VARCHAR: if (!type.hasMaximumLength()) { throw new IllegalArgumentException("ORC varchar type has no length specified"); } return new VarcharTreeReader(path, columnId, type.getMaximumLength(), conf); case BINARY: return new BinaryTreeReader(path, columnId, conf); case TIMESTAMP: return new TimestampTreeReader(path, columnId, conf); case DATE: return new DateTreeReader(path, columnId, conf); case DECIMAL: int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION; int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return new DecimalTreeReader(path, columnId, precision, scale, conf); case STRUCT: return new StructTreeReader(path, columnId, types, included, conf); case LIST: return new ListTreeReader(path, columnId, types, included, conf); case MAP: return new MapTreeReader(path, columnId, types, included, conf); case UNION: return new UnionTreeReader(path, columnId, types, included, conf); default: throw new IllegalArgumentException("Unsupported type " + type.getKind()); } } OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = (int) stripe.getFooterLength(); // read the footer ByteBuffer tailBuf = ByteBuffer.allocate(tailLength); file.seek(offset); file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength); return OrcProto.StripeFooter.parseFrom(InStream.create("footer", new ByteBuffer[] { tailBuf }, new long[] { 0 }, tailLength, codec, bufferSize)); } static enum Location { BEFORE, MIN, MIDDLE, MAX, AFTER } /** * Given a point and min and max, determine if the point is before, at the * min, in the middle, at the max, or after the range. * @param point the point to test * @param min the minimum point * @param max the maximum point * @param <T> the type of the comparision * @return the location of the point */ static <T> Location compareToRange(Comparable<T> point, T min, T max) { int minCompare = point.compareTo(min); if (minCompare < 0) { return Location.BEFORE; } else if (minCompare == 0) { return Location.MIN; } int maxCompare = point.compareTo(max); if (maxCompare > 0) { return Location.AFTER; } else if (maxCompare == 0) { return Location.MAX; } return Location.MIDDLE; } /** * Get the maximum value out of an index entry. * @param index * the index entry * @return the object for the maximum value or null if there isn't one */ static Object getMax(ColumnStatistics index) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMaximum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMaximum(); } else if (index instanceof StringColumnStatistics) { return ((StringColumnStatistics) index).getMaximum(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMaximum(); } else if (index instanceof DecimalColumnStatistics) { return ((DecimalColumnStatistics) index).getMaximum(); } else if (index instanceof TimestampColumnStatistics) { return ((TimestampColumnStatistics) index).getMaximum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics) index).getTrueCount() != 0) { return "true"; } else { return "false"; } } else { return null; } } /** * Get the minimum value out of an index entry. * @param index * the index entry * @return the object for the minimum value or null if there isn't one */ static Object getMin(ColumnStatistics index) { if (index instanceof IntegerColumnStatistics) { return ((IntegerColumnStatistics) index).getMinimum(); } else if (index instanceof DoubleColumnStatistics) { return ((DoubleColumnStatistics) index).getMinimum(); } else if (index instanceof StringColumnStatistics) { return ((StringColumnStatistics) index).getMinimum(); } else if (index instanceof DateColumnStatistics) { return ((DateColumnStatistics) index).getMinimum(); } else if (index instanceof DecimalColumnStatistics) { return ((DecimalColumnStatistics) index).getMinimum(); } else if (index instanceof TimestampColumnStatistics) { return ((TimestampColumnStatistics) index).getMinimum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics) index).getFalseCount() != 0) { return "false"; } else { return "true"; } } else { return null; } } /** * Evaluate a predicate with respect to the statistics from the column * that is referenced in the predicate. * @param index the statistics for the column mentioned in the predicate * @param predicate the leaf predicate we need to evaluation * @return the set of truth values that may be returned for the given * predicate. */ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index, PredicateLeaf predicate) { ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index); Object minValue = getMin(cs); Object maxValue = getMax(cs); return evaluatePredicateRange(predicate, minValue, maxValue); } static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, Object max) { // if we didn't have any values, everything must have been null if (min == null) { if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) { return TruthValue.YES; } else { return TruthValue.NULL; } } Location loc; try { // Predicate object and stats object can be one of the following base types // LONG, DOUBLE, STRING, DATE, DECIMAL // Out of these DATE is not implicitly convertible to other types and rest // others are implicitly convertible. In cases where DATE cannot be converted // the stats object is converted to text and comparison is performed. // When STRINGs are converted to other base types, NumberFormat exception // can occur in which case TruthValue.YES_NO_NULL value is returned Object baseObj = predicate.getLiteral(); Object minValue = getConvertedStatsObj(min, baseObj); Object maxValue = getConvertedStatsObj(max, baseObj); Object predObj = getBaseObjectForComparison(baseObj, minValue); switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO; } else { return TruthValue.YES_NO; } case EQUALS: loc = compareToRange((Comparable) predObj, minValue, maxValue); if (minValue.equals(maxValue) && loc == Location.MIN) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE || loc == Location.AFTER) { return TruthValue.NO_NULL; } else { return TruthValue.YES_NO_NULL; } case LESS_THAN: loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.AFTER) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE || loc == Location.MIN) { return TruthValue.NO_NULL; } else { return TruthValue.YES_NO_NULL; } case LESS_THAN_EQUALS: loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.AFTER || loc == Location.MAX) { return TruthValue.YES_NULL; } else if (loc == Location.BEFORE) { return TruthValue.NO_NULL; } else { return TruthValue.YES_NO_NULL; } case IN: if (minValue.equals(maxValue)) { // for a single value, look through to see if that value is in the // set for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(arg, minValue); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN) { return TruthValue.YES_NULL; } } return TruthValue.NO_NULL; } else { // are all of the values outside of the range? for (Object arg : predicate.getLiteralList()) { predObj = getBaseObjectForComparison(arg, minValue); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { return TruthValue.YES_NO_NULL; } } return TruthValue.NO_NULL; } case BETWEEN: List<Object> args = predicate.getLiteralList(); Object predObj1 = getBaseObjectForComparison(args.get(0), minValue); loc = compareToRange((Comparable) predObj1, minValue, maxValue); if (loc == Location.BEFORE || loc == Location.MIN) { Object predObj2 = getBaseObjectForComparison(args.get(1), minValue); Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue); if (loc2 == Location.AFTER || loc2 == Location.MAX) { return TruthValue.YES_NULL; } else if (loc2 == Location.BEFORE) { return TruthValue.NO_NULL; } else { return TruthValue.YES_NO_NULL; } } else if (loc == Location.AFTER) { return TruthValue.NO_NULL; } else { return TruthValue.YES_NO_NULL; } case IS_NULL: return TruthValue.YES_NO; default: return TruthValue.YES_NO_NULL; } // in case failed conversion, return the default YES_NO_NULL truth value } catch (NumberFormatException nfe) { return TruthValue.YES_NO_NULL; } } private static Object getBaseObjectForComparison(Object predObj, Object statsObj) { if (predObj != null) { if (predObj instanceof ExprNodeConstantDesc) { predObj = ((ExprNodeConstantDesc) predObj).getValue(); } // following are implicitly convertible if (statsObj instanceof Long) { if (predObj instanceof Double) { return ((Double) predObj).longValue(); } else if (predObj instanceof HiveDecimal) { return ((HiveDecimal) predObj).longValue(); } else if (predObj instanceof String) { return Long.valueOf(predObj.toString()); } } else if (statsObj instanceof Double) { if (predObj instanceof Long) { return ((Long) predObj).doubleValue(); } else if (predObj instanceof HiveDecimal) { return ((HiveDecimal) predObj).doubleValue(); } else if (predObj instanceof String) { return Double.valueOf(predObj.toString()); } } else if (statsObj instanceof String) { return predObj.toString(); } else if (statsObj instanceof HiveDecimal) { if (predObj instanceof Long) { return HiveDecimal.create(((Long) predObj)); } else if (predObj instanceof Double) { return HiveDecimal.create(predObj.toString()); } else if (predObj instanceof String) { return HiveDecimal.create(predObj.toString()); } else if (predObj instanceof BigDecimal) { return HiveDecimal.create((BigDecimal) predObj); } } } return predObj; } private static Object getConvertedStatsObj(Object statsObj, Object predObj) { // converting between date and other types is not implicit, so convert to // text for comparison if (((predObj instanceof DateWritable) && !(statsObj instanceof DateWritable)) || ((statsObj instanceof DateWritable) && !(predObj instanceof DateWritable))) { return StringUtils.stripEnd(statsObj.toString(), null); } if (statsObj instanceof String) { return StringUtils.stripEnd(statsObj.toString(), null); } return statsObj; } /** * Pick the row groups that we need to load from the current stripe. * @return an array with a boolean for each row group or null if all of the * row groups must be read. * @throws IOException */ private boolean[] pickRowGroups() throws IOException { // if we don't have a sarg or indexes, we read everything if (sarg == null || rowIndexStride == 0) { return null; } readRowIndex(currentStripe); long rowsInStripe = stripes.get(currentStripe).getNumberOfRows(); int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride); boolean[] result = new boolean[groupsInStripe]; TruthValue[] leafValues = new TruthValue[sargLeaves.size()]; for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) { for (int pred = 0; pred < leafValues.length; ++pred) { if (filterColumns[pred] != -1) { OrcProto.ColumnStatistics stats = indexes[filterColumns[pred]].getEntry(rowGroup) .getStatistics(); leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred)); if (LOG.isDebugEnabled()) { LOG.debug("Stats = " + stats); LOG.debug("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]); } } else { // the column is a virtual column leafValues[pred] = TruthValue.YES_NO_NULL; } } result[rowGroup] = sarg.evaluate(leafValues).isNeeded(); if (LOG.isDebugEnabled()) { LOG.debug( "Row group " + (rowIndexStride * rowGroup) + " to " + (rowIndexStride * (rowGroup + 1) - 1) + " is " + (result[rowGroup] ? "" : "not ") + "included."); } } // if we found something to skip, use the array. otherwise, return null. for (boolean b : result) { if (!b) { return result; } } return null; } private void clearStreams() throws IOException { // explicit close of all streams to de-ref ByteBuffers for (InStream is : streams.values()) { is.close(); } if (bufferChunks != null) { if (zcr != null) { for (BufferChunk bufChunk : bufferChunks) { zcr.releaseBuffer(bufChunk.chunk); } } bufferChunks.clear(); } streams.clear(); } /** * Read the current stripe into memory. * @throws IOException */ private void readStripe() throws IOException { StripeInformation stripe = stripes.get(currentStripe); stripeFooter = readStripeFooter(stripe); clearStreams(); // setup the position in the stripe rowCountInStripe = stripe.getNumberOfRows(); rowInStripe = 0; rowBaseInStripe = 0; for (int i = 0; i < currentStripe; ++i) { rowBaseInStripe += stripes.get(i).getNumberOfRows(); } // reset all of the indexes for (int i = 0; i < indexes.length; ++i) { indexes[i] = null; } includedRowGroups = pickRowGroups(); // move forward to the first unskipped row if (includedRowGroups != null) { while (rowInStripe < rowCountInStripe && !includedRowGroups[(int) (rowInStripe / rowIndexStride)]) { rowInStripe = Math.min(rowCountInStripe, rowInStripe + rowIndexStride); } } // if we haven't skipped the whole stripe, read the data if (rowInStripe < rowCountInStripe) { // if we aren't projecting columns or filtering rows, just read it all if (included == null && includedRowGroups == null) { readAllDataStreams(stripe); } else { readPartialDataStreams(stripe); } reader.startStripe(streams, stripeFooter.getColumnsList()); // if we skipped the first row group, move the pointers forward if (rowInStripe != 0) { seekToRowEntry((int) (rowInStripe / rowIndexStride)); } } } private void readAllDataStreams(StripeInformation stripe) throws IOException { long start = stripe.getIndexLength(); long end = start + stripe.getDataLength(); // explicitly trigger 1 big read DiskRange[] ranges = new DiskRange[] { new DiskRange(start, end) }; bufferChunks = readDiskRanges(file, stripe.getOffset(), Arrays.asList(ranges)); List<OrcProto.Stream> streamDescriptions = stripeFooter.getStreamsList(); createStreams(streamDescriptions, bufferChunks, null, codec, bufferSize, streams); } /** * The sections of stripe that we need to read. */ static class DiskRange { /** the first address we need to read. */ long offset; /** the first address afterwards. */ long end; DiskRange(long offset, long end) { this.offset = offset; this.end = end; if (end < offset) { throw new IllegalArgumentException("invalid range " + this); } } @Override public boolean equals(Object other) { if (other == null || other.getClass() != getClass()) { return false; } DiskRange otherR = (DiskRange) other; return otherR.offset == offset && otherR.end == end; } @Override public String toString() { return "range start: " + offset + " end: " + end; } } /** * The sections of stripe that we have read. * This might not match diskRange - 1 disk range can be multiple buffer chunks, depending on DFS block boundaries. */ static class BufferChunk { final ByteBuffer chunk; /** the first address we need to read. */ final long offset; /** end of the buffer **/ final long end; BufferChunk(ByteBuffer chunk, long offset) { this.offset = offset; this.chunk = chunk; end = offset + chunk.remaining(); } @Override public final String toString() { return "range start: " + offset + " size: " + chunk.remaining() + " type: " + (chunk.isDirect() ? "direct" : "array-backed"); } } private static final int BYTE_STREAM_POSITIONS = 1; private static final int RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1; private static final int BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1; private static final int RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1; /** * Get the offset in the index positions for the column that the given * stream starts. * @param encoding the encoding of the column * @param type the type of the column * @param stream the kind of the stream * @param isCompressed is the file compressed * @param hasNulls does the column have a PRESENT stream? * @return the number of positions that will be used for that stream */ static int getIndexPosition(OrcProto.ColumnEncoding.Kind encoding, OrcProto.Type.Kind type, OrcProto.Stream.Kind stream, boolean isCompressed, boolean hasNulls) { if (stream == OrcProto.Stream.Kind.PRESENT) { return 0; } int compressionValue = isCompressed ? 1 : 0; int base = hasNulls ? (BITFIELD_POSITIONS + compressionValue) : 0; switch (type) { case BOOLEAN: case BYTE: case SHORT: case INT: case LONG: case FLOAT: case DOUBLE: case DATE: case STRUCT: case MAP: case LIST: case UNION: return base; case CHAR: case VARCHAR: case STRING: if (encoding == OrcProto.ColumnEncoding.Kind.DICTIONARY || encoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { return base; } else { if (stream == OrcProto.Stream.Kind.DATA) { return base; } else { return base + BYTE_STREAM_POSITIONS + compressionValue; } } case BINARY: if (stream == OrcProto.Stream.Kind.DATA) { return base; } return base + BYTE_STREAM_POSITIONS + compressionValue; case DECIMAL: if (stream == OrcProto.Stream.Kind.DATA) { return base; } return base + BYTE_STREAM_POSITIONS + compressionValue; case TIMESTAMP: if (stream == OrcProto.Stream.Kind.DATA) { return base; } return base + RUN_LENGTH_INT_POSITIONS + compressionValue; default: throw new IllegalArgumentException("Unknown type " + type); } } // for uncompressed streams, what is the most overlap with the following set // of rows (long vint literal group). static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512; /** * Is this stream part of a dictionary? * @return is this part of a dictionary? */ static boolean isDictionary(OrcProto.Stream.Kind kind, OrcProto.ColumnEncoding encoding) { OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind(); return kind == OrcProto.Stream.Kind.DICTIONARY_DATA || (kind == OrcProto.Stream.Kind.LENGTH && (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY || encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2)); } /** * Plan the ranges of the file that we need to read given the list of * columns and row groups. * @param streamList the list of streams avaiable * @param indexes the indexes that have been loaded * @param includedColumns which columns are needed * @param includedRowGroups which row groups are needed * @param isCompressed does the file have generic compression * @param encodings the encodings for each column * @param types the types of the columns * @param compressionSize the compression block size * @return the list of disk ranges that will be loaded */ static List<DiskRange> planReadPartialDataStreams(List<OrcProto.Stream> streamList, OrcProto.RowIndex[] indexes, boolean[] includedColumns, boolean[] includedRowGroups, boolean isCompressed, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Type> types, int compressionSize) { List<DiskRange> result = new ArrayList<DiskRange>(); long offset = 0; // figure out which columns have a present stream boolean[] hasNull = new boolean[types.size()]; for (OrcProto.Stream stream : streamList) { if (stream.getKind() == OrcProto.Stream.Kind.PRESENT) { hasNull[stream.getColumn()] = true; } } for (OrcProto.Stream stream : streamList) { long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); if (StreamName.getArea(streamKind) == StreamName.Area.DATA && includedColumns[column]) { // if we aren't filtering or it is a dictionary, load it. if (includedRowGroups == null || isDictionary(streamKind, encodings.get(column))) { result.add(new DiskRange(offset, offset + length)); } else { for (int group = 0; group < includedRowGroups.length; ++group) { if (includedRowGroups[group]) { int posn = getIndexPosition(encodings.get(column).getKind(), types.get(column).getKind(), stream.getKind(), isCompressed, hasNull[column]); long start = indexes[column].getEntry(group).getPositions(posn); final long nextGroupOffset; if (group < includedRowGroups.length - 1) { nextGroupOffset = indexes[column].getEntry(group + 1).getPositions(posn); } else { nextGroupOffset = length; } // figure out the worst case last location // if adjacent groups have the same compressed block offset then stretch the slop // by factor of 2 to safely accommodate the next compression block. // One for the current compression block and another for the next compression block. final long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + compressionSize) : WORST_UNCOMPRESSED_SLOP; long end = (group == includedRowGroups.length - 1) ? length : Math.min(length, nextGroupOffset + slop); result.add(new DiskRange(offset + start, offset + end)); } } } } offset += length; } return result; } /** * Update the disk ranges to collapse adjacent or overlapping ranges. It * assumes that the ranges are sorted. * @param ranges the list of disk ranges to merge */ static void mergeDiskRanges(List<DiskRange> ranges) { DiskRange prev = null; for (int i = 0; i < ranges.size(); ++i) { DiskRange current = ranges.get(i); if (prev != null && overlap(prev.offset, prev.end, current.offset, current.end)) { prev.offset = Math.min(prev.offset, current.offset); prev.end = Math.max(prev.end, current.end); ranges.remove(i); i -= 1; } else { prev = current; } } } /** * Read the list of ranges from the file. * @param file the file to read * @param base the base of the stripe * @param ranges the disk ranges within the stripe to read * @return the bytes read for each disk range, which is the same length as * ranges * @throws IOException */ List<BufferChunk> readDiskRanges(FSDataInputStream file, long base, List<DiskRange> ranges) throws IOException { ArrayList<BufferChunk> result = new ArrayList<RecordReaderImpl.BufferChunk>(ranges.size()); for (DiskRange range : ranges) { int len = (int) (range.end - range.offset); long off = range.offset; file.seek(base + off); if (zcr != null) { while (len > 0) { ByteBuffer partial = zcr.readBuffer(len, false); result.add(new BufferChunk(partial, off)); int read = partial.remaining(); len -= read; off += read; } } else { byte[] buffer = new byte[len]; file.readFully(buffer, 0, buffer.length); result.add(new BufferChunk(ByteBuffer.wrap(buffer), range.offset)); } } return result; } /** * Does region A overlap region B? The end points are inclusive on both sides. * @param leftA A's left point * @param rightA A's right point * @param leftB B's left point * @param rightB B's right point * @return Does region A overlap region B? */ static boolean overlap(long leftA, long rightA, long leftB, long rightB) { if (leftA <= leftB) { return rightA >= leftB; } return rightB >= leftA; } /** * Build a string representation of a list of disk ranges. * @param ranges ranges to stringify * @return the resulting string */ static String stringifyDiskRanges(List<DiskRange> ranges) { StringBuilder buffer = new StringBuilder(); buffer.append("["); for (int i = 0; i < ranges.size(); ++i) { if (i != 0) { buffer.append(", "); } buffer.append(ranges.get(i).toString()); } buffer.append("]"); return buffer.toString(); } static void createStreams(List<OrcProto.Stream> streamDescriptions, List<BufferChunk> ranges, boolean[] includeColumn, CompressionCodec codec, int bufferSize, Map<StreamName, InStream> streams) throws IOException { long offset = 0; for (OrcProto.Stream streamDesc : streamDescriptions) { int column = streamDesc.getColumn(); if ((includeColumn == null || includeColumn[column]) && StreamName.getArea(streamDesc.getKind()) == StreamName.Area.DATA) { long length = streamDesc.getLength(); int first = -1; int last = -2; for (int i = 0; i < ranges.size(); ++i) { BufferChunk range = ranges.get(i); if (overlap(offset, offset + length, range.offset, range.end)) { if (first == -1) { first = i; } last = i; } } ByteBuffer[] buffers = new ByteBuffer[last - first + 1]; long[] offsets = new long[last - first + 1]; for (int i = 0; i < buffers.length; ++i) { BufferChunk range = ranges.get(i + first); long start = Math.max(range.offset, offset); long end = Math.min(range.end, offset + length); buffers[i] = range.chunk.slice(); assert range.chunk.position() == 0; // otherwise we'll mix up positions /* * buffers are positioned in-wards if the offset > range.offset * offsets[i] == range.offset - offset, except if offset > range.offset */ if (offset > range.offset) { buffers[i].position((int) (offset - range.offset)); buffers[i].limit((int) (end - range.offset)); offsets[i] = 0; } else { buffers[i].position(0); buffers[i].limit((int) (end - range.offset)); offsets[i] = (range.offset - offset); } } StreamName name = new StreamName(column, streamDesc.getKind()); streams.put(name, InStream.create(name.toString(), buffers, offsets, length, codec, bufferSize)); } offset += streamDesc.getLength(); } } private void readPartialDataStreams(StripeInformation stripe) throws IOException { List<OrcProto.Stream> streamList = stripeFooter.getStreamsList(); List<DiskRange> chunks = planReadPartialDataStreams(streamList, indexes, included, includedRowGroups, codec != null, stripeFooter.getColumnsList(), types, bufferSize); if (LOG.isDebugEnabled()) { LOG.debug("chunks = " + stringifyDiskRanges(chunks)); } mergeDiskRanges(chunks); if (LOG.isDebugEnabled()) { LOG.debug("merge = " + stringifyDiskRanges(chunks)); } bufferChunks = readDiskRanges(file, stripe.getOffset(), chunks); createStreams(streamList, bufferChunks, included, codec, bufferSize, streams); } @Override public boolean hasNext() throws IOException { return rowInStripe < rowCountInStripe; } /** * Read the next stripe until we find a row that we don't skip. * @throws IOException */ private void advanceStripe() throws IOException { rowInStripe = rowCountInStripe; while (rowInStripe >= rowCountInStripe && currentStripe < stripes.size() - 1) { currentStripe += 1; readStripe(); } } /** * Skip over rows that we aren't selecting, so that the next row is one that we will read. * @param nextRow the row we want to go to * @throws IOException */ private void advanceToNextRow(long nextRow) throws IOException { long nextRowInStripe = nextRow - rowBaseInStripe; // check for row skipping if (rowIndexStride != 0 && includedRowGroups != null && nextRowInStripe < rowCountInStripe) { int rowGroup = (int) (nextRowInStripe / rowIndexStride); if (!includedRowGroups[rowGroup]) { while (rowGroup < includedRowGroups.length && !includedRowGroups[rowGroup]) { rowGroup += 1; } // if we are off the end of the stripe, just move stripes if (rowGroup >= includedRowGroups.length) { advanceStripe(); return; } nextRowInStripe = Math.min(rowCountInStripe, rowGroup * rowIndexStride); } } if (nextRowInStripe < rowCountInStripe) { if (nextRowInStripe != rowInStripe) { if (rowIndexStride != 0) { int rowGroup = (int) (nextRowInStripe / rowIndexStride); seekToRowEntry(rowGroup); reader.skipRows(nextRowInStripe - rowGroup * rowIndexStride); } else { reader.skipRows(nextRowInStripe - rowInStripe); } rowInStripe = nextRowInStripe; } } else { advanceStripe(); } } /** * ??? * OrcRecordReader.nextKeyValue() */ @Override public Object next(Object previous) throws IOException { Object result = reader.next(previous); // find the next row rowInStripe += 1; advanceToNextRow(rowInStripe + rowBaseInStripe); if (isLogTraceEnabled) { LOG.trace("row from " + reader.path); LOG.trace("orc row = " + result); } return result; } @Override public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException { VectorizedRowBatch result = null; if (rowInStripe >= rowCountInStripe) { currentStripe += 1; readStripe(); } long batchSize = 0; // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row // groups are selected then marker position is set to the end of range (subset of row groups // within strip). Batch size computed out of marker position makes sure that batch size is // aware of row group boundary and will not cause overflow when reading rows // illustration of this case is here https://issues.apache.org/jira/browse/HIVE-6287 if (rowIndexStride != 0 && includedRowGroups != null && rowInStripe < rowCountInStripe) { int startRowGroup = (int) (rowInStripe / rowIndexStride); if (!includedRowGroups[startRowGroup]) { while (startRowGroup < includedRowGroups.length && !includedRowGroups[startRowGroup]) { startRowGroup += 1; } } int endRowGroup = startRowGroup; while (endRowGroup < includedRowGroups.length && includedRowGroups[endRowGroup]) { endRowGroup += 1; } final long markerPosition = (endRowGroup * rowIndexStride) < rowCountInStripe ? (endRowGroup * rowIndexStride) : rowCountInStripe; batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (markerPosition - rowInStripe)); if (LOG.isDebugEnabled() && batchSize < VectorizedRowBatch.DEFAULT_SIZE) { LOG.debug("markerPosition: " + markerPosition + " batchSize: " + batchSize); } } else { batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (rowCountInStripe - rowInStripe)); } rowInStripe += batchSize; if (previous == null) { ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize); result = new VectorizedRowBatch(cols.length); result.cols = cols; } else { result = (VectorizedRowBatch) previous; result.selectedInUse = false; reader.nextVector(result.cols, (int) batchSize); } result.size = (int) batchSize; advanceToNextRow(rowInStripe + rowBaseInStripe); return result; } @Override public void close() throws IOException { clearStreams(); pool.clear(); file.close(); } @Override public long getRowNumber() { return rowInStripe + rowBaseInStripe + firstRow; } /** * Return the fraction of rows that have been read from the selected. * section of the file * @return fraction between 0.0 and 1.0 of rows consumed */ @Override public float getProgress() { return ((float) rowBaseInStripe + rowInStripe) / totalRowCount; } private int findStripe(long rowNumber) { for (int i = 0; i < stripes.size(); i++) { StripeInformation stripe = stripes.get(i); if (stripe.getNumberOfRows() > rowNumber) { return i; } rowNumber -= stripe.getNumberOfRows(); } throw new IllegalArgumentException("Seek after the end of reader range"); } OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException { long offset = stripes.get(stripeIndex).getOffset(); OrcProto.StripeFooter stripeFooter; OrcProto.RowIndex[] indexes; // if this is the current stripe, use the cached objects. if (stripeIndex == currentStripe) { stripeFooter = this.stripeFooter; indexes = this.indexes; } else { stripeFooter = readStripeFooter(stripes.get(stripeIndex)); indexes = new OrcProto.RowIndex[this.indexes.length]; } for (OrcProto.Stream stream : stripeFooter.getStreamsList()) { if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) { int col = stream.getColumn(); if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[(int) stream.getLength()]; file.seek(offset); file.readFully(buffer); indexes[col] = OrcProto.RowIndex .parseFrom(InStream.create("index", new ByteBuffer[] { ByteBuffer.wrap(buffer) }, new long[] { 0 }, stream.getLength(), codec, bufferSize)); } } offset += stream.getLength(); } return indexes; } private void seekToRowEntry(int rowEntry) throws IOException { PositionProvider[] index = new PositionProvider[indexes.length]; for (int i = 0; i < indexes.length; ++i) { if (indexes[i] != null) { index[i] = new PositionProviderImpl(indexes[i].getEntry(rowEntry)); } } reader.seek(index); } @Override public void seekToRow(long rowNumber) throws IOException { if (rowNumber < 0) { throw new IllegalArgumentException("Seek to a negative row number " + rowNumber); } else if (rowNumber < firstRow) { throw new IllegalArgumentException("Seek before reader range " + rowNumber); } // convert to our internal form (rows from the beginning of slice) rowNumber -= firstRow; // move to the right stripe int rightStripe = findStripe(rowNumber); if (rightStripe != currentStripe) { currentStripe = rightStripe; readStripe(); } readRowIndex(currentStripe); // if we aren't to the right row yet, advanance in the stripe. advanceToNextRow(rowNumber); } }