Java tutorial
/* * Copyright (c) 2012 - 2017 Splice Machine, Inc. * * This file is part of Splice Machine. * Splice Machine is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either * version 3, or (at your option) any later version. * Splice Machine is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License along with Splice Machine. * If not, see <http://www.gnu.org/licenses/>. */ package com.splicemachine.orc.predicate; import com.splicemachine.db.iapi.error.StandardException; import com.splicemachine.db.iapi.services.io.ArrayUtil; import com.splicemachine.db.iapi.store.access.Qualifier; import com.splicemachine.db.iapi.types.*; import com.splicemachine.orc.OrcPredicate; import com.splicemachine.orc.metadata.*; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.SerializationUtils; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.spark.sql.types.*; import org.apache.spark.sql.types.DataType; import java.io.*; import java.util.HashMap; import java.util.Map; import java.util.List; /** * Qualifiers in Conjunctive Normal Form... * * */ public class SpliceORCPredicate implements OrcPredicate, Externalizable { private Qualifier[][] qualifiers; int[] baseColumnMap; StructType structType; public SpliceORCPredicate() { } public SpliceORCPredicate(Qualifier[][] qualifiers, int[] baseColumnMap, StructType structType) { this.qualifiers = qualifiers; this.baseColumnMap = baseColumnMap; this.structType = structType; } @Override public void writeExternal(ObjectOutput out) throws IOException { ArrayUtil.writeIntArray(out, baseColumnMap); out.writeBoolean(qualifiers != null); if (qualifiers != null) { out.writeInt(qualifiers.length); out.writeInt(qualifiers[0].length); for (int i = 0; i < qualifiers[0].length; i++) { out.writeObject(qualifiers[0][i]); } for (int and_idx = 1; and_idx < qualifiers.length; and_idx++) { out.writeInt(qualifiers[and_idx].length); for (int or_idx = 0; or_idx < qualifiers[and_idx].length; or_idx++) { out.writeObject(qualifiers[and_idx][or_idx]); } } } out.writeObject(structType.json()); } @Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { baseColumnMap = ArrayUtil.readIntArray(in); if (in.readBoolean()) { qualifiers = new Qualifier[in.readInt()][]; qualifiers[0] = new Qualifier[in.readInt()]; for (int i = 0; i < qualifiers[0].length; i++) { qualifiers[0][i] = (Qualifier) in.readObject(); } for (int and_idx = 1; and_idx < qualifiers.length; and_idx++) { qualifiers[and_idx] = new Qualifier[in.readInt()]; for (int or_idx = 0; or_idx < qualifiers[and_idx].length; or_idx++) { qualifiers[and_idx][or_idx] = (Qualifier) in.readObject(); } } } structType = (StructType) StructType.fromJson((String) in.readObject()); } @Override public boolean matches(long numberOfRows, Map<Integer, ColumnStatistics> statisticsByColumnIndex) { try { boolean row_qualifies = true; if (qualifiers == null) return numberOfRows > 0 ? true : false; for (int i = 0; i < qualifiers[0].length; i++) { Qualifier q = qualifiers[0][i]; if (q.getVariantType() == Qualifier.VARIANT) continue; // Cannot Push Down Qualifier StatsEval statsEval = statsEval(numberOfRows, statisticsByColumnIndex.get(q.getStoragePosition()), structType.fields()[baseColumnMap[q.getStoragePosition()]].dataType()); if (statsEval == null || statsEval.maximumDVD == null || statsEval.minimumDVD == null) return true; if (q.getOrderable() == null || q.getOrderable().isNull()) { if (!statsEval.hasNulls) return false; continue; } switch (q.getOperator()) { case com.splicemachine.db.iapi.types.DataType.ORDER_OP_LESSTHAN: case com.splicemachine.db.iapi.types.DataType.ORDER_OP_LESSOREQUALS: case com.splicemachine.db.iapi.types.DataType.ORDER_OP_GREATERTHAN: case com.splicemachine.db.iapi.types.DataType.ORDER_OP_GREATEROREQUALS: row_qualifies = statsEval.minimumDVD.compare(q.getOperator(), q.getOrderable(), q.getOrderedNulls(), q.getUnknownRV()) || statsEval.maximumDVD.compare(q.getOperator(), q.getOrderable(), q.getOrderedNulls(), q.getUnknownRV()); break; case com.splicemachine.db.iapi.types.DataType.ORDER_OP_EQUALS: row_qualifies = statsEval.minimumDVD.compare( com.splicemachine.db.iapi.types.DataType.ORDER_OP_LESSOREQUALS, q.getOrderable(), q.getOrderedNulls(), q.getUnknownRV()) && statsEval.maximumDVD.compare( com.splicemachine.db.iapi.types.DataType.ORDER_OP_GREATEROREQUALS, q.getOrderable(), q.getOrderedNulls(), q.getUnknownRV()); break; } if (q.negateCompareResult()) row_qualifies = !row_qualifies; if (!row_qualifies) return (false); } return true; } catch (Exception e) { throw new RuntimeException(e); } /* // all the qual[0] and terms passed, now process the OR clauses for (int and_idx = 1; and_idx < qual_list.length; and_idx++) { Column orCols = null; for (int or_idx = 0; or_idx < qual_list[and_idx].length; or_idx++) { Qualifier q = qual_list[and_idx][or_idx]; if (q.getVariantType() == Qualifier.VARIANT) continue; // Cannot Push Down Qualifier q.clearOrderableCache(); Column orCol = dataset.col(allColIdInSpark[(baseColumnMap != null ? baseColumnMap[q.getStoragePosition()] : q.getStoragePosition())]); Object value = q.getOrderable().getObject(); switch (q.getOperator()) { case DataType.ORDER_OP_LESSTHAN: orCol = q.negateCompareResult() ? orCol.geq(value) : orCol.lt(value); break; case DataType.ORDER_OP_LESSOREQUALS: orCol = q.negateCompareResult() ? orCol.gt(value) : orCol.leq(value); break; case DataType.ORDER_OP_GREATERTHAN: orCol = q.negateCompareResult() ? orCol.leq(value) : orCol.gt(value); break; case DataType.ORDER_OP_GREATEROREQUALS: orCol = q.negateCompareResult() ? orCol.lt(value) : orCol.geq(value); break; case DataType.ORDER_OP_EQUALS: orCol = q.negateCompareResult() ? orCol.notEqual(value) : orCol.equalTo(value); break; } if (orCols == null) orCols = orCol; else orCols = orCols.or(orCol); } } } catch (Exception e) { throw new RuntimeException(e); } */ } public String serialize() throws IOException { return Base64.encodeBase64String(SerializationUtils.serialize(this)); } public static SpliceORCPredicate deserialize(String base64String) throws IOException { return (SpliceORCPredicate) SerializationUtils.deserialize(Base64.decodeBase64(base64String)); } public static Map<Integer, ColumnStatistics> partitionStatsEval(List<Integer> baseColumnMap, StructType rowStruct, List<Integer> partitionColumns, String[] values) { try { Map<Integer, ColumnStatistics> partitionStatistics = new HashMap<>(partitionColumns.size()); for (int i = 0; i < partitionColumns.size(); i++) { int storagePos = partitionColumns.get(i); if (storagePos >= baseColumnMap.size()) continue; int j = baseColumnMap.get(storagePos); if (j == -1) // Partition Column Not In List... continue; DataType dataType = rowStruct.fields()[j].dataType(); if (dataType instanceof BooleanType) { partitionStatistics.put(storagePos, BooleanStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof DecimalType) { partitionStatistics.put(storagePos, DecimalStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof StringType) { partitionStatistics.put(storagePos, StringStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof DateType) { partitionStatistics.put(storagePos, DateStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof IntegerType) { partitionStatistics.put(storagePos, IntegerStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof LongType) { partitionStatistics.put(storagePos, IntegerStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof DoubleType) { partitionStatistics.put(storagePos, DoubleStatistics.getPartitionColumnStatistics(values[i])); } else if (dataType instanceof FloatType) { partitionStatistics.put(storagePos, DoubleStatistics.getPartitionColumnStatistics(values[i])); } else { } } return partitionStatistics; } catch (Exception se) { throw new RuntimeException(se); } } public StatsEval statsEval(long numberOfRows, ColumnStatistics columnStatistics, DataType dataType) throws StandardException { StatsEval statsEval = new StatsEval(); if (numberOfRows == 0) { statsEval.alwaysFalse = true; return statsEval; } if (columnStatistics == null) { statsEval.alwaysTrue = true; return statsEval; } if (columnStatistics.hasNumberOfValues() && columnStatistics.getNumberOfValues() == 0) { statsEval.allNulls = true; return statsEval; } statsEval.hasNulls = columnStatistics.getNumberOfValues() != numberOfRows; if (dataType instanceof BooleanType && columnStatistics.getBooleanStatistics() != null) { BooleanStatistics booleanStatistics = columnStatistics.getBooleanStatistics(); boolean hasTrueValues = (booleanStatistics.getTrueValueCount() != 0); boolean hasFalseValues = (columnStatistics.getNumberOfValues() != booleanStatistics .getTrueValueCount()); if (hasTrueValues && hasFalseValues) { statsEval.minimumDVD = new SQLBoolean(false); statsEval.maximumDVD = new SQLBoolean(true); } if (hasTrueValues) { statsEval.minimumDVD = new SQLBoolean(true); statsEval.maximumDVD = new SQLBoolean(true); } if (hasFalseValues) { statsEval.minimumDVD = new SQLBoolean(false); statsEval.maximumDVD = new SQLBoolean(false); } return statsEval; } else if (dataType instanceof DecimalType) { DecimalStatistics decimalStatistics = columnStatistics.getDecimalStatistics(); statsEval.minimumDVD = new SQLDecimal(decimalStatistics.getMin()); statsEval.maximumDVD = new SQLDecimal(decimalStatistics.getMax()); } else if (dataType instanceof StringType) { StringStatistics stringStatistics = columnStatistics.getStringStatistics(); statsEval.minimumDVD = new SQLVarchar(stringStatistics.getMin().toStringUtf8()); statsEval.maximumDVD = new SQLVarchar(stringStatistics.getMax().toStringUtf8()); } else if (dataType instanceof DateType) { DateStatistics dateStatistics = columnStatistics.getDateStatistics(); statsEval.minimumDVD = new SQLDate(dateStatistics.getMin()); statsEval.maximumDVD = new SQLDate(dateStatistics.getMax()); } else if (dataType instanceof IntegerType) { IntegerStatistics integerStatistics = columnStatistics.getIntegerStatistics(); statsEval.minimumDVD = new SQLInteger(integerStatistics.getMin().intValue()); statsEval.maximumDVD = new SQLInteger(integerStatistics.getMax().intValue()); } else if (dataType instanceof LongType) { IntegerStatistics integerStatistics = columnStatistics.getIntegerStatistics(); statsEval.minimumDVD = new SQLLongint(integerStatistics.getMin()); statsEval.maximumDVD = new SQLLongint(integerStatistics.getMax()); } else if (dataType instanceof DoubleType) { DoubleStatistics doubleStatistics = columnStatistics.getDoubleStatistics(); statsEval.minimumDVD = new SQLDouble(doubleStatistics.getMin()); statsEval.maximumDVD = new SQLDouble(doubleStatistics.getMax()); } else if (dataType instanceof FloatType) { DoubleStatistics doubleStatistics = columnStatistics.getDoubleStatistics(); statsEval.minimumDVD = new SQLReal(doubleStatistics.getMin().floatValue()); statsEval.maximumDVD = new SQLReal(doubleStatistics.getMax().floatValue()); } else { statsEval.alwaysTrue = true; } return statsEval; } }