Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.common.data; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.google.gson.JsonPrimitive; import com.linkedin.pinot.common.Utils; import com.linkedin.pinot.common.config.ConfigKey; import com.linkedin.pinot.common.config.ConfigNodeLifecycleAware; import com.linkedin.pinot.common.utils.DataSchema; import com.linkedin.pinot.common.utils.EqualityUtils; import com.linkedin.pinot.common.utils.primitive.ByteArray; import java.nio.charset.Charset; import javax.annotation.Nonnull; import org.apache.avro.Schema.Type; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; /** * The <code>FieldSpec</code> class contains all specs related to any field (column) in {@link Schema}. * <p>There are 3 types of <code>FieldSpec</code>: * {@link DimensionFieldSpec}, {@link MetricFieldSpec}, {@link TimeFieldSpec} * <p>Specs stored are as followings: * <p>- <code>Name</code>: name of the field. * <p>- <code>DataType</code>: type of the data stored (e.g. INTEGER, LONG, FLOAT, DOUBLE, STRING). * <p>- <code>IsSingleValueField</code>: single-value or multi-value field. * <p>- <code>DefaultNullValue</code>: when no value found for this field, use this value. Stored in string format. */ @SuppressWarnings("unused") public abstract class FieldSpec implements Comparable<FieldSpec>, ConfigNodeLifecycleAware { private static final Charset UTF_8 = Charset.forName("UTF-8"); private static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_INT = Integer.MIN_VALUE; private static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_LONG = Long.MIN_VALUE; private static final Float DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT = Float.NEGATIVE_INFINITY; private static final Double DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE = Double.NEGATIVE_INFINITY; private static final ByteArray DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES = new ByteArray(new byte[0]); private static final String DEFAULT_DIMENSION_NULL_VALUE_OF_STRING = "null"; private static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0; private static final Long DEFAULT_METRIC_NULL_VALUE_OF_LONG = 0L; private static final Float DEFAULT_METRIC_NULL_VALUE_OF_FLOAT = 0.0F; private static final Double DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE = 0.0D; private static final String DEFAULT_METRIC_NULL_VALUE_OF_STRING = "null"; private static final ByteArray DEFAULT_METRIC_NULL_VALUE_OF_BYTES = new ByteArray(new byte[0]); @ConfigKey("name") protected String _name; @ConfigKey("dataType") protected DataType _dataType; @ConfigKey("singleValue") protected boolean _isSingleValueField = true; protected Object _defaultNullValue; @ConfigKey("defaultNullValue") private transient String _stringDefaultNullValue; // Transform function to generate this column, can be based on other columns protected String _transformFunction; // Default constructor required by JSON de-serializer. DO NOT REMOVE. public FieldSpec() { } public FieldSpec(@Nonnull String name, @Nonnull DataType dataType, boolean isSingleValueField) { _name = name; _dataType = dataType.getStoredType(); _isSingleValueField = isSingleValueField; _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, null); } public FieldSpec(@Nonnull String name, @Nonnull DataType dataType, boolean isSingleValueField, @Nonnull Object defaultNullValue) { _name = name; _dataType = dataType.getStoredType(); _isSingleValueField = isSingleValueField; _stringDefaultNullValue = defaultNullValue.toString(); _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } @Nonnull public abstract FieldType getFieldType(); @Nonnull public String getName() { return _name; } // Required by JSON de-serializer. DO NOT REMOVE. public void setName(@Nonnull String name) { _name = name; } @Nonnull public DataType getDataType() { return _dataType; } // Required by JSON de-serializer. DO NOT REMOVE. public void setDataType(@Nonnull DataType dataType) { _dataType = dataType.getStoredType(); _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } public boolean isSingleValueField() { return _isSingleValueField; } // Required by JSON de-serializer. DO NOT REMOVE. public void setSingleValueField(boolean isSingleValueField) { _isSingleValueField = isSingleValueField; } @Nonnull public Object getDefaultNullValue() { return _defaultNullValue; } // Required by JSON de-serializer. DO NOT REMOVE. public void setDefaultNullValue(@Nonnull Object defaultNullValue) { _stringDefaultNullValue = defaultNullValue.toString(); if (_dataType != null) { _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } } private static Object getDefaultNullValue(@Nonnull FieldType fieldType, @Nonnull DataType dataType, String stringDefaultNullValue) { if (stringDefaultNullValue != null) { switch (dataType) { case INT: return Integer.valueOf(stringDefaultNullValue); case LONG: return Long.valueOf(stringDefaultNullValue); case FLOAT: return Float.valueOf(stringDefaultNullValue); case DOUBLE: return Double.valueOf(stringDefaultNullValue); case STRING: return stringDefaultNullValue; case BYTES: try { return new ByteArray(Hex.decodeHex(stringDefaultNullValue.toCharArray())); } catch (DecoderException e) { Utils.rethrowException(e); // Re-throw to avoid handling exceptions in all callers. } default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } } else { switch (fieldType) { case METRIC: switch (dataType) { case INT: return DEFAULT_METRIC_NULL_VALUE_OF_INT; case LONG: return DEFAULT_METRIC_NULL_VALUE_OF_LONG; case FLOAT: return DEFAULT_METRIC_NULL_VALUE_OF_FLOAT; case DOUBLE: return DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE; case STRING: return DEFAULT_METRIC_NULL_VALUE_OF_STRING; case BYTES: return DEFAULT_METRIC_NULL_VALUE_OF_BYTES; default: throw new UnsupportedOperationException( "Unknown default null value for metric field of data type: " + dataType); } case DIMENSION: case TIME: case DATE_TIME: switch (dataType) { case INT: return DEFAULT_DIMENSION_NULL_VALUE_OF_INT; case LONG: return DEFAULT_DIMENSION_NULL_VALUE_OF_LONG; case FLOAT: return DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT; case DOUBLE: return DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE; case STRING: return DEFAULT_DIMENSION_NULL_VALUE_OF_STRING; case BYTES: return DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES; default: throw new UnsupportedOperationException( "Unknown default null value for dimension/time field of data type: " + dataType); } default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType); } } } /** * Transform function if defined else null. * @return */ public String getTransformFunction() { return _transformFunction; } // Required by JSON de-serializer. DO NOT REMOVE. public void setTransformFunction(@Nonnull String transformFunction) { _transformFunction = transformFunction; } /** * Returns the {@link JsonObject} representing the field spec. * <p>Only contains fields with non-default value. * <p>NOTE: here we use {@link JsonObject} to preserve the insertion order. */ @Nonnull public JsonObject toJsonObject() { JsonObject jsonObject = new JsonObject(); jsonObject.addProperty("name", _name); jsonObject.addProperty("dataType", _dataType.name()); if (!_isSingleValueField) { jsonObject.addProperty("singleValueField", false); } appendDefaultNullValue(jsonObject); return jsonObject; } protected void appendDefaultNullValue(@Nonnull JsonObject jsonObject) { if (_defaultNullValue == null) { return; } if (!_defaultNullValue.equals(getDefaultNullValue(getFieldType(), _dataType, null))) { if (_defaultNullValue instanceof Number) { jsonObject.add("defaultNullValue", new JsonPrimitive((Number) _defaultNullValue)); } else { jsonObject.addProperty("defaultNullValue", _defaultNullValue.toString()); } } } @Nonnull public JsonObject toAvroSchemaJsonObject() { JsonObject jsonSchema = new JsonObject(); jsonSchema.addProperty("name", _name); switch (_dataType) { case INT: jsonSchema.add("type", convertStringsToJsonArray("null", "int")); return jsonSchema; case LONG: jsonSchema.add("type", convertStringsToJsonArray("null", "long")); return jsonSchema; case FLOAT: jsonSchema.add("type", convertStringsToJsonArray("null", "float")); return jsonSchema; case DOUBLE: jsonSchema.add("type", convertStringsToJsonArray("null", "double")); return jsonSchema; case STRING: jsonSchema.add("type", convertStringsToJsonArray("null", "string")); return jsonSchema; default: throw new UnsupportedOperationException(); } } private static JsonArray convertStringsToJsonArray(String... strings) { JsonArray jsonArray = new JsonArray(); for (String string : strings) { jsonArray.add(new JsonPrimitive(string)); } return jsonArray; } @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") @Override public boolean equals(Object o) { if (EqualityUtils.isSameReference(this, o)) { return true; } if (EqualityUtils.isNullOrNotSameClass(this, o)) { return false; } FieldSpec that = (FieldSpec) o; return EqualityUtils.isEqual(_name, that._name) && EqualityUtils.isEqual(_dataType, that._dataType) && EqualityUtils.isEqual(_isSingleValueField, that._isSingleValueField) && EqualityUtils.isEqual(_defaultNullValue, that._defaultNullValue); } @Override public int hashCode() { int result = EqualityUtils.hashCodeOf(_name); result = EqualityUtils.hashCodeOf(result, _dataType); result = EqualityUtils.hashCodeOf(result, _isSingleValueField); result = EqualityUtils.hashCodeOf(result, _defaultNullValue); return result; } @Override public void preInject() { // Nothing to do } @Override public void postInject() { // Compute the actual default null value from its string representation _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } /** * The <code>FieldType</code> enum is used to demonstrate the real world business logic for a column. * <p><code>DIMENSION</code>: columns used to filter records. * <p><code>METRIC</code>: columns used to apply aggregation on. <code>METRIC</code> field only contains numeric data. * <p><code>TIME</code>: time column (at most one per {@link Schema}). <code>TIME</code> field can be used to prune * <p><code>DATE_TIME</code>: time column (at most one per {@link Schema}). <code>TIME</code> field can be used to prune * segments, otherwise treated the same as <code>DIMENSION</code> field. */ public enum FieldType { DIMENSION, METRIC, TIME, DATE_TIME } /** * The <code>DataType</code> enum is used to demonstrate the data type of a field. */ public enum DataType { INT, LONG, FLOAT, DOUBLE, BOOLEAN, // Stored as STRING STRING, BYTES; /** * Returns the data type stored in Pinot. */ public DataType getStoredType() { return this == BOOLEAN ? STRING : this; } /** * Returns the data type stored in Pinot that is associated with the given Avro type. */ public static DataType valueOf(Type avroType) { switch (avroType) { case INT: return INT; case LONG: return LONG; case FLOAT: return FLOAT; case DOUBLE: return DOUBLE; case BOOLEAN: case STRING: case ENUM: return STRING; case BYTES: return BYTES; default: throw new UnsupportedOperationException("Unsupported Avro type: " + avroType); } } /** * Returns the number of bytes needed to store the data type. */ public int size() { switch (this) { case INT: return Integer.BYTES; case LONG: return Long.BYTES; case FLOAT: return Float.BYTES; case DOUBLE: return Double.BYTES; case BYTES: // TODO: Metric size is only used for Star-tree generation, which is not supported yet. return MetricFieldSpec.UNDEFINED_METRIC_SIZE; default: throw new IllegalStateException("Cannot get number of bytes for: " + this); } } } @Override public int compareTo(FieldSpec otherSpec) { // Sort fieldspecs based on their name return _name.compareTo(otherSpec._name); } }