Java tutorial
package com.ebay.erl.mobius.core.model; import java.io.DataInput; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; import java.sql.Time; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import com.ebay.erl.mobius.core.ConfigureConstants; import com.ebay.erl.mobius.core.collection.CaseInsensitiveTreeMap; import com.ebay.erl.mobius.util.Util; /** * Represents a record(row) in a dataset. * <p> * * <p> * This product is licensed under the Apache License, Version 2.0, * available at http://www.apache.org/licenses/LICENSE-2.0. * * This product contains portions derived from Apache hadoop which is * licensed under the Apache License, Version 2.0, available at * http://hadoop.apache.org. * * 2007 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan * */ public class Tuple implements WritableComparable<Tuple>, Cloneable, Configurable, RawComparator<Tuple> { public static void main(String[] arg) throws Throwable { DataOutputStream dos = new DataOutputStream(new FileOutputStream(new File("s:/test.binary"))); //dos.writeUTF("00_DW_LSTG_ITEM"); dos.writeInt(5); dos.flush(); dos.close(); } protected long estimate_size_in_bytes = 8/* object header*/; public static final byte BYTE_TYPE = 0; public static final byte SHORT_TYPE = 1; public static final byte INTEGER_TYPE = 2; public static final byte LONG_TYPE = 3; public static final byte FLOAT_TYPE = 4; public static final byte DOUBLE_TYPE = 5; public static final byte STRING_TYPE = 6; /** * java.sql.Date, only contains yyyy-mm-dd */ public static final byte DATE_TYPE = 7; /** * java.sql.Timestamp type, contains * yyyy-mm-dd hh:mm:ss.[fff] */ public static final byte TIMESTAMP_TYPE = 8; /** * java.sql.Time type, contains hh:mm:ss.[fff] */ public static final byte TIME_TYPE = 9; public static final byte BOOLEAN_TYPE = 10; /** * array of elements, the element types have * to be the one supported by Tuple. */ public static final byte ARRAY_TYPE = 11; /** * representing value generated by a combiner */ public static final byte RESULT_WRAPPER_TYPE = 120; /** * represents org.apache.hadoop.io.NullWritable */ public static final byte NULL_WRITABLE_TYPE = 121; /** * Represents com.ebay.erl.mobius.core.model.Tuple type. */ public static final byte TUPLE_TYPE = 122; /** * Represents byte[]. */ public static final byte BYTE_ARRAY_TYPE = 123; /** * Represents {@link CaseInsensitiveTreeMap} type. */ public static final byte STRING_MAP_TYPE = 124; /** * Represents {@link org.hadoop.hadoop.io.Writable} * type. */ public static final byte WRITABLE_TYPE = 125; /** * Represents {@link java.io.Serializable} */ public static final byte SERIALIZABLE_TYPE = 126; /** * Represents java <code>null</code>. */ public static final byte NULL_TYPE = 127; private static final Log LOGGER = LogFactory.getLog(Tuple.class); /** * a mapping from a column name to an index in the {@link #values} * array to get it value. */ protected Map<String, Integer> namesToIdxMapping = new HashMap<String, Integer>(); /** * to hold the actual values of columns within this mapper. */ protected List<Object> values = new ArrayList<Object>(1); /** * key used for synchronizing update activities to * this tuple. */ protected final String _INSERT_KEY = "insert"; /** * The default delimiter to separate the column * values, used in {@link #toString()} and can * be override by providing {@link ConfigureConstants.TUPLE_TO_STRING_DELIMITER} */ private static String _DELIMITER = "\t"; /** * Hadoop configuration object. */ protected Configuration conf; protected boolean isMutable = true; /** * For quick look up of lower cases form of a given string so * we don't to toLowerCase() everytime when retriving or setting * value to a tuple */ protected static Map<String/* any string*/, String/*lower case of the key*/> lowerCases = Collections .synchronizedMap(new HashMap<String, String>()); protected static Map<Set<String>/*not sorted set*/, List<String>/*sorted keys*/> sortedKeys = Collections .synchronizedMap(new HashMap<Set<String>, List<String>>()); /** * An immutable {@link Tuple} which contains only single Column * NULL with null value. */ public static final Tuple NULL; static { Tuple nullTuple = new Tuple(); nullTuple.putNull("NULL"); NULL = Tuple.immutable(nullTuple); } protected String[] toStringOrdering; protected synchronized static String lowerCase(String key) { String result = lowerCases.get(key); if (result == null) { result = key.toLowerCase(); lowerCases.put(key, result); } return result; } protected synchronized static List<String> getSorted(Set<String> aKeySet) { List<String> sorted = sortedKeys.get(aKeySet); if (sorted == null) { sorted = new ArrayList<String>(); for (String aKey : aKeySet) { sorted.add(lowerCase(aKey)); } Collections.sort(sorted); sortedKeys.put(aKeySet, sorted); } return sorted; } /** * To set the schema of this tuple. * <p> * * This method is called when deserialize * a tuple from disk, and should be called * only in that case. * <p> * * The ordering of the <code>schema</code> is * sorted first then set into this tuple. It * it because when a tuple is being serialize to * disk, the values are extracted according to * the order of their name. So when deserialize * it back, the schema need to be sorted first * and then set. * <p> * * The reason of doing this is because Mobius stores * the schema in hadoop configuration, but the user * might create Tuples with the same schema but in * different ordering (insert the values in the different * order than the defined schema). To solve this * problem, Mobius always serialize the values in * a tuple according to their schema name order, so it * can be deserialized back always in the right schema. * */ public void setSchema(String[] schema) { Arrays.sort(schema); this.namesToIdxMapping.clear(); int idx = 0; for (String aName : schema) { this.namesToIdxMapping.put(lowerCase(aName), idx++); } } /** * Check if the given <code>name</code> exists * in this tuple or not, if so, returns its index. * * @return if the given <code>name</code> is in the * schema of this tuple, the index of the schema * is returned. Otherwise, {@link IllegalArgumentException} * is thrown. */ protected int check_in_schema(String name) { Integer idx = null; if ((idx = this.namesToIdxMapping.get(lowerCase(name))) != null) { return idx; } else { throw new IllegalArgumentException("[" + name + "] doesn't exist in this tuple's schema:" + this.namesToIdxMapping.keySet() + ", index:" + this.namesToIdxMapping.values()); } } /** * Get the value of the given column <code>name</code> in * the <code>expecting_type</code>. * <p> * * If the original <code>value</code> is not in the exact * same <code>expecting_type</code>, Mobius will try to * convert it to the <code>expecting_type</code> and return * it. * <p> * * If the original <code>value</code> is null, then * <code>default_value</code> is returned. * * @param expecting_type user specified type for the returned value. * @param name the name of a column within this tuple. * @param value the original value of the column <code>name</code> * @param default_value if the original value is null, then <code>default_value</code> * is returned. * @return */ protected Object get(byte expecting_type, String name, Object value, Object default_value) { byte actual_type = Tuple.getType(value); if (expecting_type == Tuple.getType(value)) { return value; } else { // expecting type and actual type are different. if (Tuple.isNumericalType(expecting_type) && Tuple.isNumericalType(actual_type)) { if (value == null) { return default_value; } // expecting value and actual value are both numerical type, // but not exact the same, perform transformation. switch (expecting_type) { case BYTE_TYPE: return ((Number) value).byteValue(); case SHORT_TYPE: return ((Number) value).shortValue(); case INTEGER_TYPE: return ((Number) value).intValue(); case LONG_TYPE: return ((Number) value).longValue(); case FLOAT_TYPE: return ((Number) value).floatValue(); case DOUBLE_TYPE: return ((Number) value).doubleValue(); default: throw new IllegalArgumentException( String.format("%02X", expecting_type) + " is not numerical type."); } } else if (expecting_type == STRING_TYPE && actual_type != STRING_TYPE) { if (value == null) { return default_value; } LOGGER.trace("Accessing column[" + name + "], the expecting type is [" + Tuple.getTypeString(expecting_type) + "], " + "but actual type is [" + Tuple.getTypeString(actual_type) + "], using toString() to get the value."); // expecting type is string, but the actual type is not string, // convert it to string by calling toString(). return value.toString(); } else if (Tuple.isDateType(expecting_type) && Tuple.isDateType(actual_type)) { // date type, but the expecting type is not the same as the actual type. // Ex:, expecting java.sql.Date, but actual is java.sql.Timestamp if (value == null) { return default_value; } // use java.util.Date as the actual type would be // either java.sql.Date, java.sql.Time or // java.sql.Timestamp. java.util.Date actual_value = (java.util.Date) value; switch (expecting_type) { case Tuple.DATE_TYPE: java.sql.Date sqlDate = new java.sql.Date(actual_value.getTime()); return sqlDate; case Tuple.TIME_TYPE: java.sql.Time sqlTime = new java.sql.Time(actual_value.getTime()); return sqlTime; case Tuple.TIMESTAMP_TYPE: java.sql.Timestamp sqlTimeStamp = new java.sql.Timestamp(actual_value.getTime()); return sqlTimeStamp; default: throw new IllegalArgumentException(Tuple.getTypeString(actual_type) + " is not a date type."); } } else if (Tuple.isDateType(expecting_type) && actual_type == STRING_TYPE) { // expecting type is date type, but the actual type is string switch (expecting_type) { case Tuple.DATE_TYPE: java.sql.Date sqlDate = java.sql.Date.valueOf((String) value); return sqlDate; case Tuple.TIME_TYPE: java.sql.Time sqlTime = java.sql.Time.valueOf((String) value); return sqlTime; case Tuple.TIMESTAMP_TYPE: java.sql.Timestamp sqlTimeStamp = java.sql.Timestamp.valueOf((String) value); return sqlTimeStamp; default: throw new IllegalArgumentException(Tuple.getTypeString(actual_type) + " is not a date type."); } } else if (Tuple.isNumericalType(expecting_type) && actual_type == STRING_TYPE) { if (value == null) { return default_value; } // expecting type is numerical, but the actual type is string, // try to convert it into numerical value String value_str = (String) value; try { switch (expecting_type) { case BYTE_TYPE: return Byte.parseByte(value_str); case SHORT_TYPE: return Short.parseShort(value_str); case INTEGER_TYPE: return Integer.parseInt(value_str); case LONG_TYPE: return Long.parseLong(value_str); case FLOAT_TYPE: return Float.parseFloat(value_str); case DOUBLE_TYPE: return Double.parseDouble(value_str); default: throw new IllegalArgumentException( String.format("%02X", expecting_type) + " is not numerical type."); } } catch (NumberFormatException e) { throw new NumberFormatException("The value of column[" + name + "] is [" + value_str + "] and cannot be converted into " + Tuple.getTypeString(expecting_type)); } } else if (expecting_type == BOOLEAN_TYPE && actual_type == STRING_TYPE) { return Boolean.valueOf((String) value); } throw new ClassCastException("Column [" + name + "] is " + Tuple.getTypeString(actual_type) + ", cannot be converted into " + Tuple.getTypeString(expecting_type)); } } /** * Get the value for column <code>name</code> in the * <code>expecting_type</code>. * * @param expecting_type the type of the returned object. * @param name name of a column in this tuple. * @param default_value if the value of the column is null, then * <code>default_value</code> is returned. * @return the value in the <code>expected_type</code> of column * <code>name</code> */ protected Object get(byte expecting_type, String name, Object default_value) { Object value = this.get(name); return this.get(expecting_type, name, value, default_value); } /** * Get the value of <code>idx</code><sub>th</sub> * column in this tuple. * <p> * * If the value of that column is not double, Mobius will * try to convert it to double if possible, otherwise, * {@link NumberFormatException} will be thrown. * * @param idx index to a column in this tuple, starts from 0. * @param default_value if the value of the column is null, * then <code>default_value</code> is returned. * @return */ public Double getDouble(int idx, double default_value) { Object value = this.get(idx); return (Double) this.get(Tuple.DOUBLE_TYPE, "@index:" + idx, value, default_value); } /** * Get the readable string representation for a given * type. * * @param type type supported by Tuple. * @return a readable string representation of the * <code>type</code>. */ public static String getTypeString(byte type) { TupleTypeHandler<String> converter = new TupleTypeHandler<String>() { @Override protected String on_boolean() throws IOException { return Boolean.class.getCanonicalName(); } @Override protected String on_byte() throws IOException { return Byte.class.getCanonicalName(); } @Override protected String on_byte_array() throws IOException { return "byte[]"; } @Override protected String on_date() throws IOException { return java.sql.Date.class.getCanonicalName(); } @Override protected String on_default() throws IOException { throw new IllegalArgumentException("Unsupported type [" + String.format("0x%02X", type) + "]"); } @Override protected String on_double() throws IOException { return Double.class.getCanonicalName(); } @Override protected String on_float() throws IOException { return Float.class.getCanonicalName(); } @Override protected String on_integer() throws IOException { return Integer.class.getCanonicalName(); } @Override protected String on_long() throws IOException { return Long.class.getCanonicalName(); } @Override protected String on_null() throws IOException { return "Null"; } @Override protected String on_null_writable() throws IOException { return NullWritable.class.getCanonicalName(); } @Override protected String on_serializable() throws IOException { return Serializable.class.getCanonicalName(); } @Override protected String on_short() throws IOException { return Short.class.getCanonicalName(); } @Override protected String on_string() throws IOException { return String.class.getCanonicalName(); } @Override protected String on_string_map() throws IOException { return Map.class.getCanonicalName() + "<String, String>"; } @Override protected String on_time() throws IOException { return Time.class.getCanonicalName(); } @Override protected String on_timestamp() throws IOException { return Timestamp.class.getCanonicalName(); } @Override protected String on_tuple() throws IOException { return Tuple.class.getCanonicalName(); } @Override protected String on_writable() throws IOException { return WritableComparable.class.getCanonicalName(); } @Override protected String on_result_wrapper() throws IOException { return ResultWrapper.class.getCanonicalName(); } @Override protected String on_array() throws IOException { return Array.class.getCanonicalName(); } }; try { return converter.handle(type); } catch (IOException e) { throw new RuntimeException(e); } } /** * Get the type of the given <code>obj</code> * in <code>byte</code> format, one of the * supported type in Tuple. */ @SuppressWarnings("unchecked") public static byte getType(Object obj) { if (obj == null) { return NULL_TYPE; } else if (obj instanceof Byte) { return BYTE_TYPE; } else if (obj instanceof Short) { return SHORT_TYPE; } else if (obj instanceof Integer) { return INTEGER_TYPE; } else if (obj instanceof Long) { return LONG_TYPE; } else if (obj instanceof Float) { return FLOAT_TYPE; } else if (obj instanceof Double) { return DOUBLE_TYPE; } else if (obj instanceof String) { return STRING_TYPE; } else if (obj instanceof java.sql.Date) { return DATE_TYPE; } else if (obj instanceof Timestamp) { return TIMESTAMP_TYPE; } else if (obj instanceof Time) { return TIME_TYPE; } else if (obj instanceof Boolean) { return BOOLEAN_TYPE; } else if (obj instanceof Map) { return STRING_MAP_TYPE; } else if (obj instanceof Array) { return ARRAY_TYPE; } else if (obj instanceof NullWritable) { return NULL_WRITABLE_TYPE; } else if (obj instanceof Tuple) { return TUPLE_TYPE; } else if (obj instanceof ResultWrapper) { return RESULT_WRAPPER_TYPE; } else if (obj instanceof Writable) { return WRITABLE_TYPE; } else if (obj instanceof Serializable) { return SERIALIZABLE_TYPE; } else if (obj instanceof byte[]) { return BYTE_ARRAY_TYPE; } else { throw new IllegalArgumentException(obj.getClass().getName() + " is not supported in Tuple."); } } /** * Deserialize the tuple from the input * <code>in</code>. */ @Override public void readFields(DataInput in) throws IOException { if (this.values == null) { this.values = new ArrayList<Object>(); } else this.values.clear(); int columns_nbrs = in.readInt(); ReadFieldImpl read_impl = new ReadFieldImpl(this.values, in, this.conf); for (int i = 0; i < columns_nbrs; i++) { byte type = in.readByte(); read_impl.handle(type); } } /** * Serialize this tuple to the output <code>out</code>. * <p> * * When serialize, the values are stored in the order * of schema name's ordering. See {@link #setSchema(String[])} * for more explanation. */ @Override public void write(DataOutput out) throws IOException { // write the size of the column of this tuple out.writeInt(this.values.size()); if (this.values.size() != this.namesToIdxMapping.size()) { StringBuffer sb = new StringBuffer(); for (Object v : values) sb.append(v.toString()).append(","); throw new IllegalArgumentException(this.getClass().getCanonicalName() + ", the length of values and schmea is not the same, " + "very likely the schema of this tuple has not been set yet, please set it using Tuple#setSchema(String[])." + " Values:[" + sb.toString() + "] schema:" + this.namesToIdxMapping.keySet()); } WriteImpl writeImpl = new WriteImpl(out); for (String aColumnName : getSorted(this.namesToIdxMapping.keySet())) { Object value = this.values.get(this.namesToIdxMapping.get(aColumnName)); byte type = getType(value); out.write(type); writeImpl.setValue(value); writeImpl.handle(type); } } /** * Compare this tuple with <code>other</code>. * <p> * * It calls {@link #compare(Tuple, Tuple)} underline. */ @Override public int compareTo(Tuple other) { return compare(this, other); } /** * Add a new column in the given <code></code> with * provided <code>value</code>. * * @throws UnsupportedOperationException if this tuple is immutable. */ public Tuple insert(String name, Object value) { if (!this.isMutable) { throw new UnsupportedOperationException("This tuple is immutable, cannot be modified."); } TupleColumnName tcn = TupleColumnName.valueOf(lowerCase(name)); String id = tcn.getID(); String mapKey = tcn.getMapKey(); synchronized (this._INSERT_KEY) { if (this.namesToIdxMapping.containsKey(id)) { // do nothing } else { this.namesToIdxMapping.put(id, this.namesToIdxMapping.size()); } int value_idx = this.namesToIdxMapping.get(id); if (value_idx < this.values.size()) { // replace mode, replace the old value if (mapKey == null) { // the <code>name</code> is not map ID style this.values.set(value_idx, value); } else { if (this.values.get(value_idx) instanceof CaseInsensitiveTreeMap) { ((CaseInsensitiveTreeMap) this.values.get(value_idx)).put(mapKey, value.toString()); } else { throw new IllegalArgumentException( "Column [" + id + "] is not " + CaseInsensitiveTreeMap.class.getCanonicalName() + ", " + "cannot change the value using map style ID [" + name + "]"); } } } else if (value_idx == this.values.size()) { // insert mode if (mapKey == null) { // the <code>name</code> is not map ID style this.values.add(value_idx, value); } else { // user tries to use a Map style ID to add new value, disallow throw new IllegalArgumentException("Column [" + id + "] has not been initialized as Map, " + "cannot use [" + name + "] to change the value of the key directly."); } } else { throw new IllegalStateException(); } } return this; } /** * Get Hadoop configuration. */ @Override public Configuration getConf() { return this.conf; } /** * Set the Hadoop configuration, the * delimiter to be used to separated * the column value in the {@link #toString()} * is also set here. * <p> * * The default delimiter is tab, unless * {@link ConfigureConstants.TUPLE_TO_STRING_DELIMITER} * is set by user. */ @Override public void setConf(Configuration conf) { this.conf = conf; synchronized (Tuple._DELIMITER) { if (Tuple._DELIMITER.isEmpty()) { Tuple._DELIMITER = this.conf.get(ConfigureConstants.TUPLE_TO_STRING_DELIMITER, "\t"); } } } private final TupleColumnComparator _COLUMN_COMPARATOR = new TupleColumnComparator(); /** * compare two tuples in low level row format. */ @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { DataInputBuffer d1 = new DataInputBuffer(); d1.reset(b1, s1, l1); DataInputBuffer d2 = new DataInputBuffer(); d2.reset(b2, s2, l2); int _compare_result = Integer.MAX_VALUE; try { // read number of columns from the two tuple int columns_nbr1 = d1.readInt(); int columns_nbr2 = d2.readInt(); int upper_bound = Math.min(columns_nbr1, columns_nbr2); // same column size, start to compare column by column for (int i = 0; i < upper_bound; i++) { byte type1 = d1.readByte(); byte type2 = d2.readByte(); _COLUMN_COMPARATOR.setType(type1, type2); _compare_result = _COLUMN_COMPARATOR.compare(d1, d2, this.conf); // comparing for a column has complete if (_compare_result != 0 && _compare_result != Integer.MAX_VALUE) { // has different, return return _compare_result; } } // end of iterating columns until the upper limit // finished all columns comparison(up to the upper-bound), still cannot find difference, // use the column size as the comparing result. _compare_result = columns_nbr1 - columns_nbr2; } catch (IOException e) { throw new RuntimeException(e); } if (_compare_result == Integer.MAX_VALUE) throw new IllegalArgumentException(); return _compare_result; } /** * Comparing two tuples. * <p> * * It compares the values of the two tuples one * by one in sequence, and as long as there is a * difference between two values, then the * difference is returned. * <p> * * If the number of values in the tuples are * different, the values are compared up to * the boundary of the smaller size tuple. If * all the values before the boundary have no * differences, then the smaller size tuple * is considered to be placed before the bigger * size tuple. */ @Override public int compare(Tuple t1, Tuple t2) { int value1_nbr = t1.values.size(); int value2_nbr = t2.values.size(); int upper_bound = Math.min(value1_nbr, value2_nbr); int _compare_result = Integer.MAX_VALUE; try { for (int i = 0; i < upper_bound; i++) { Object v1 = t1.values.get(i); Object v2 = t2.values.get(i); byte type1 = Tuple.getType(v1); byte type2 = Tuple.getType(v2); _COLUMN_COMPARATOR.setType(type1, type2); _compare_result = _COLUMN_COMPARATOR.compare(v1, v2, null); // comparing for a column has complete if (_compare_result != 0 && _compare_result != Integer.MAX_VALUE) { // has different, return return _compare_result; } } // end of iterating columns until the upper limit // finished all columns comparison(up to the upper-bound), still cannot find difference, // use the column size as the comparing result. _compare_result = value1_nbr - value2_nbr; } catch (IOException e) { throw new RuntimeException(e); } if (_compare_result == Integer.MAX_VALUE) throw new IllegalArgumentException(); return _compare_result; // all the same, return 0 } public Tuple put(String name, ResultWrapper<?> v) { if (v == null) throw new NullPointerException("value cannot be null."); this.insert(name, v); return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to <code>null</code>. * * @return return this tuple */ public Tuple putNull(String name) { this.insert(name, null); return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, byte value) { this.insert(name, value); // add 16 bytes, 16 bytes is derived from // java.lang.instrument.Instrumentation to test // single Byte (as the value will be auto-boxed // into Byte) on a 64bit VM. this.estimate_size_in_bytes += 16; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, byte[] value) { this.insert(name, value); // add the length of the value plus 16 bytes // base. this.estimate_size_in_bytes += (value.length + 16); return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, short value) { this.insert(name, value); // add 16 bytes, 16 bytes is derived from // java.lang.instrument.Instrumentation to test // single Short (as the value will be auto-boxed // into Short) on a 64bit VM. this.estimate_size_in_bytes += 16; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, int value) { this.insert(name, value); // add 16 bytes, 16 bytes is derived from // java.lang.instrument.Instrumentation to test // Integer.MAX_VALUE (as the value will be auto-boxed // into Integer) on a 64bit VM. this.estimate_size_in_bytes += 16; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, long value) { this.insert(name, value); // add 24 bytes, 24 bytes is derived from // java.lang.instrument.Instrumentation to test // Long.MAX_VALUE (as the value will be auto-boxed // into Long) on a 64bit VM. this.estimate_size_in_bytes += 24; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, float value) { this.insert(name, value); // add 16 bytes, 16 bytes is derived from // java.lang.instrument.Instrumentation to test // Float.MAX_VALUE (as the value will be auto-boxed // into Float) on a 64bit VM. this.estimate_size_in_bytes += 16; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, double value) { this.insert(name, value); // add 24 bytes, 24 bytes is derived from // java.lang.instrument.Instrumentation to test // Double.MAX_VALUE (as the value will be auto-boxed // into Double) on a 64bit VM. this.estimate_size_in_bytes += 24; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, boolean value) { this.insert(name, value); // add 16 bytes for inserting a boolean, // it will be auto-box into Boolean, and // using java.lang.instrument.Instrumentation // to test single Boolean on a 64bit VM require // 16 bytes. this.estimate_size_in_bytes += 16; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, java.sql.Date value) { if (value == null) throw new NullPointerException("value cannot be null."); this.insert(name, value); // add 24 bytes, 24 bytes is derived from // java.lang.instrument.Instrumentation to test // single java.sql.Date on a 64bit VM. this.estimate_size_in_bytes += 24; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, Timestamp value) { if (value == null) throw new NullPointerException("value cannot be null."); this.insert(name, value); // add 32 bytes, 32 bytes is derived from // java.lang.instrument.Instrumentation to test // single java.sql.Timestamp on a 64bit VM. this.estimate_size_in_bytes += 32; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, Time value) { if (value == null) throw new NullPointerException("value cannot be null."); this.insert(name, value); // add 24 bytes, 24 bytes is derived from // java.lang.instrument.Instrumentation to test // single java.sql.Time on a 64bit VM. this.estimate_size_in_bytes += 24; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, String value) { if (value == null) throw new NullPointerException("value cannot be null."); this.insert(name, value); // reference: http://www.javamex.com/tutorials/memory/string_memory_usage.shtml // Minimum String memory usage (bytes) = 8 * (int) ((((no chars) * 2) + 45) / 8) this.estimate_size_in_bytes += 8 * (int) ((((value.length()) * 2) + 45) / 8); return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, CaseInsensitiveTreeMap value) { // insure the value is case-insensitive TreeMap this.insert(name, value); // reference: http://www.javamex.com/tutorials/memory/string_memory_usage.shtml // Minimum String memory usage (bytes) = 8 * (int) ((((no chars) * 2) + 45) / 8) long est_key_size = 8 * (int) ((((64/*assume 64 chars string*/) * 2) + 45) / 8); this.estimate_size_in_bytes += 48 /*map overhead*/ + est_key_size * 2/*assume key and value is about the same size*/; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple */ public Tuple put(String name, Writable value) { if (value == null) throw new NullPointerException("value cannot be null."); this.insert(name, value); // estimate only, put 512 byte here this.estimate_size_in_bytes += 512; return this; } /** * For the given column named <code>name</code>, * add (if it doesn't exist) to this tuple or update (if it * exists) its value to the given <code>value</code>. * * @return return this tuple * @throws IllegalArgumentException if <code>value</code> is instance * of Map but not {@link CaseInsensitiveTreeMap}. Or when <code>value</code> * doesn't implement {@link Comparable} */ public Tuple put(String name, Serializable value) { if (value == null) throw new NullPointerException("value cannot be null."); if (value instanceof Map<?, ?> && !(value instanceof CaseInsensitiveTreeMap)) { throw new IllegalArgumentException( "The supported map type is only " + CaseInsensitiveTreeMap.class.getCanonicalName()); } if (value instanceof Comparable<?>) { this.insert(name, value); // estimate only, put 512 byte here this.estimate_size_in_bytes += 512; return this; } else { throw new IllegalArgumentException(value.getClass().getCanonicalName() + " doesn't implement " + Comparable.class.getCanonicalName()); } } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, -1 is returned. * <p> * * If the value is not <code>null</code>, but it's * not a short type, Mobius will try to convert it * to short, otherwise, {@link NumberFormatException} * is thrown. */ public Short getShort(String name) { return this.getShort(name, (short) -1); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a short type, Mobius will try to convert it * to short, otherwise, {@link NumberFormatException} * is thrown. */ public Short getShort(String name, short default_value) { return (Short) get(SHORT_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, -1 is returned. * <p> * * If the value is not <code>null</code>, but it's * not a short type, Mobius will try to convert it * to short, otherwise, {@link NumberFormatException} * is thrown. */ public Integer getInt(String name) { return getInt(name, -1); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not an integer type, Mobius will try to convert it * to integer, otherwise, {@link NumberFormatException} * is thrown. */ public Integer getInt(String name, int default_value) { return (Integer) get(INTEGER_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, -1L is returned. * <p> * * If the value is not <code>null</code>, but it's * not a long type, Mobius will try to convert it * to long, otherwise, {@link NumberFormatException} * is thrown. */ public Long getLong(String name) { return (Long) get(LONG_TYPE, name, -1L); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a long type, Mobius will try to convert it * to long, otherwise, {@link NumberFormatException} * is thrown. */ public Long getLong(String name, long default_value) { return (Long) get(LONG_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, -1F is returned. * <p> * * If the value is not <code>null</code>, but it's * not a float type, Mobius will try to convert it * to float, otherwise, {@link NumberFormatException} * is thrown. */ public Float getFloat(String name) { return (Float) get(FLOAT_TYPE, name, -1F); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a float type, Mobius will try to convert it * to float, otherwise, {@link NumberFormatException} * is thrown. */ public Float getFloat(String name, float default_value) { return (Float) get(FLOAT_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, -1D is returned. * <p> * * If the value is not <code>null</code>, but it's * not a double type, Mobius will try to convert it * to double, otherwise, {@link NumberFormatException} * is thrown. */ public Double getDouble(String name) { return (Double) get(DOUBLE_TYPE, name, -1D); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a double type, Mobius will try to convert it * to double, otherwise, {@link NumberFormatException} * is thrown. */ public Double getDouble(String name, double default_value) { return (Double) get(DOUBLE_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, 0x00 is returned. * <p> * * If the value is not <code>null</code>, but it's * not a byte type, Mobius will try to convert it * to byte, otherwise, {@link NumberFormatException} * is thrown. */ public Byte getByte(String name) { return (Byte) get(BYTE_TYPE, name, 0x00); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a byte type, Mobius will try to convert it * to byte, otherwise, {@link NumberFormatException} * is thrown. */ public Byte getByte(String name, byte default_value) { return (Byte) get(BYTE_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>false</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a boolean type, Mobius will try to convert it * to using {@link Boolean#valueOf(String)}. */ public Boolean getBoolean(String name) { return (Boolean) get(BOOLEAN_TYPE, name, false); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a boolean type, Mobius will try to convert it * to using {@link Boolean#valueOf(String)}. */ public Boolean getBoolean(String name, boolean default_value) { return (Boolean) get(BOOLEAN_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>null</code> * is still returned. * <p> * * If the value is not <code>null</code>, but it's * not a string type, Mobius will try to convert it * to using the <code>toString()</code> method. */ public String getString(String name) { return (String) get(STRING_TYPE, name, null); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a string type, Mobius will try to convert it * to using the <code>toString()</code> method. */ public String getString(String name, String default_value) { return (String) get(STRING_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>null</code> * is still returned. * <p> * * If the value is not <code>null</code>, but it's * not a map type, {@link IllegalArgumentException} * is thrown. */ @SuppressWarnings("unchecked") public Map<String, String> getMap(String name) { return (Map<String, String>) get(STRING_MAP_TYPE, name, null); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>null</code> * is still returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Date#valueOf(String)} * method. */ public java.sql.Date getDate(String name) { return (java.sql.Date) get(DATE_TYPE, name, null); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Date#valueOf(String)} * method. */ public java.sql.Date getDate(String name, java.sql.Date default_value) { return (java.sql.Date) get(DATE_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>null</code> * is still returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Timestamp#valueOf(String)} * method. */ public Timestamp getTimestamp(String name) { return (Timestamp) get(TIMESTAMP_TYPE, name, null); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Timestamp#valueOf(String)} * method. */ public Timestamp getTimestamp(String name, java.sql.Timestamp default_value) { return (Timestamp) get(TIMESTAMP_TYPE, name, default_value); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>null</code> * is still returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Time#valueOf(String)} * method. */ public Time getTime(String name) { return (Time) get(TIME_TYPE, name, null); } /** * Get the value of column named <code>name</code>. * <p> * * If the value is <code>null</code>, <code>default_value</code> * is returned. * <p> * * If the value is not <code>null</code>, but it's * not a date type, Mobius will try to convert it * to using the {@link java.sql.Time#valueOf(String)} * method. */ public Time getTime(String name, Time default_value) { return (Time) get(TIME_TYPE, name, default_value); } /** * Get value directly using index. */ public Object get(int index) { return this.values.get(index); } /** * Get the value of column named <code>name</code>. * * @param name the name of a column. * @return value of the column. */ public Object get(String name) { TupleColumnName tcn = TupleColumnName.valueOf(lowerCase(name)); int idx = check_in_schema(tcn.getID()); if (tcn.getMapKey() == null) { // not using map style name to access the value return this.values.get(idx); } else { // user is referencing to a value of a map key. Object value = this.values.get(idx); if (value instanceof CaseInsensitiveTreeMap) { return ((CaseInsensitiveTreeMap) value).get(tcn.getMapKey()); } else { throw new IllegalArgumentException("The type of column [" + tcn.getID() + "] is not " + CaseInsensitiveTreeMap.class.getCanonicalName() + " but " + value.getClass().getCanonicalName() + ", the given ID [" + name + "] is a map style ID and cannot be applied to this column."); } } } /** * Return a new instance of {@link Tuple} which * contains the exact same data of this one. */ @Override public Tuple clone() { Tuple clone = new Tuple(); clone.namesToIdxMapping = new HashMap<String, Integer>(); clone.values = new ArrayList<Object>(this.values.size()); // fulfill the values with null for (int i = 0; i < this.values.size(); i++) { clone.values.add(null); } for (String columnName : this.namesToIdxMapping.keySet()) { Integer idx = this.namesToIdxMapping.get(columnName); clone.namesToIdxMapping.put(columnName, idx); clone.values.set(idx, this.get(idx)); } return clone; } @Override public int hashCode() { int hashCode = 0; for (Object obj : this.values) { //if( obj==null ) // throw new RuntimeException(this.namesToIdxMapping.toString()+":"+this.values.toString()); if (obj == null) continue; hashCode += obj.hashCode(); } return hashCode; } /** * Test if the given <code>type</code> is * {@link #BYTE_TYPE}, {@link #SHORT_TYPE}, * {@link #INTEGER_TYPE}, {@link #LONG_TYPE}, * {@link #FLOAT_TYPE}, or {@link #DOUBLE_TYPE}. * <p> * * Return <code>true</code> if the <code>type</code> * is within the above types, false otherwise. */ public static boolean isNumericalType(byte type) { return type >= Tuple.BYTE_TYPE && type <= Tuple.DOUBLE_TYPE; } /** * Test if the given <code>type</code> is * {@link #TIME_TYPE}, {@link #DATE_TYPE}, or * {@link #TIMESTAMP_TYPE}. * <p> * * Return <code>true</code> if the <code>type</code> * is within the above types, false otherwise. */ public static boolean isDateType(byte type) { return type == Tuple.DATE_TYPE || type == Tuple.TIMESTAMP_TYPE || type == Tuple.TIME_TYPE; } private void setMutable(boolean isMutable) { this.isMutable = isMutable; } /** * return a new instance of tuple that contains the * same data of the given <code>t</code> tuple, but * reject all modification requests, such as * {@link Tuple#insert(String, Object)}. * <p> * * Note that, this method return a new instance, the * original <code>t</code> tuple is still a mutable * {@linkplain Tuple}. */ public static Tuple immutable(Tuple t) { Tuple clone = t.clone(); clone.setMutable(false); return clone; } /** * Merge the tuples together, and return a new * tuple represents the merged result. * <p> * * All the columns in <code>t1</code> and * <code>t2</code> will be put together into * the returned Tuple. If there are columns * in <code>t2</code> also appear in <code>t1</code>, * then values from <code>t2</code> of those columns * will be used instead of the values in <code>t1</code>. */ public static Tuple merge(Tuple t1, Tuple t2) { Tuple result = new Tuple(); if (t1 != null) { for (String aColumn : t1.getSchema()) { result.insert(aColumn, t1.get(aColumn)); } } if (t2 != null) { for (String aColumn : t2.getSchema()) { result.insert(aColumn, t2.get(aColumn)); } } return result; } /** * Convert this {@link Tuple} into text, the delimiter is specified by * "mobius.tuple.tostring.delimiter" (default is tab). * <p> * */ @Override public String toString() { StringBuffer sb = new StringBuffer(); if (this.toStringOrdering != null && this.toStringOrdering.length > 0) { for (int i = 0; i < this.toStringOrdering.length; i++) { String aColumn = this.toStringOrdering[i]; Object aValue = this.get(aColumn); if (aValue != null) sb.append(aValue.toString()); if (i < this.values.size() - 1) sb.append(Tuple._DELIMITER); } } else { for (int i = 0; i < this.values.size(); i++) { Object aValue = this.values.get(i); if (aValue != null) sb.append(aValue.toString()); if (i < this.values.size() - 1) sb.append(Tuple._DELIMITER); } } return sb.toString(); } /** * Compare if the <code>obj</code> equals to * this tuple or not. * <p> * * Equals only whe the class of this tuple and the * <code>obj</code> is the same, both share same * schema, and the values of the columns are the same. */ @Override public boolean equals(Object obj) { if (obj == this) return true; if (obj.getClass().equals(this.getClass())) { Tuple that = (Tuple) obj; if (this.namesToIdxMapping.keySet().equals(that.namesToIdxMapping.keySet())) { // same schema, test the value one by one for (String name : this.namesToIdxMapping.keySet()) { Object v1 = this.get(name); Object v2 = that.get(name); if (v1 == null && v2 == null) { // both null, consider equal, move // on to the next } else if (v1 == null && v2 != null) { return false; } else if (v1 != null && v2 == null) { return false; } else if (!this.get(name).equals(that.get(name))) { // both are not equals return false; } } return true; } else { return false; } } return false; } /** * Return the schema of this column. * <p> */ public String[] getSchema() { try { String[] schema = new String[this.namesToIdxMapping.size()]; for (String aColumnName : this.namesToIdxMapping.keySet()) { int idx = this.namesToIdxMapping.get(aColumnName);// the index of this column schema[idx] = aColumnName; } return schema; } catch (NullPointerException e) { throw e; } } /** * Convert the <code>source</code> into a tuple. * <p> * * Split the <code>source</code> with the given <code>delimiter</code>, * and use them as the values to the returned tuple, then set the * schema to the tuple. * <p> * * The ordering of the schema shall be the same as the ordering of the * values from the splitted <code>source</code>. * <p> * * If the number of values in the splitted <code>source<code> is greater * than the length of <code>schema</code>, <code>IDX_$i</code> is used * as the name of those value, where <code>$i</code> starts from the * length of <code>schema</code>. */ public static Tuple valueOf(Text source, String[] schema, String delimiter) { Tuple tuple = new Tuple(); //String[] tokens = source.toString ().split (delimiter, -1); List<String> tokens = Util.nonRegexSplit(source.toString(), delimiter); for (int i = 0; i < schema.length; i++) { if (i < tokens.size()) { tuple.put(schema[i], tokens.get(i)); } else { tuple.putNull(schema[i]); } } // there are some extra columns that exceed the length of user // specified schema, put in the tail. for (int i = schema.length; i < tokens.size(); i++) { tuple.put("IDX_" + i, tokens.get(i)); } return tuple; } /** * Return the estimated size in bytes of this * tuple in memory. * <p> * * This calculation is based on 64bit VM. */ public long getEstimatedSizeInMemory() { return this.estimate_size_in_bytes; } public void setToStringOrdering(String[] columns) { this.toStringOrdering = columns; } public boolean hasSchema() { return this.namesToIdxMapping != null && this.namesToIdxMapping.keySet().size() > 0; } /** * Represents the name of a tuple column. * <p> * * The column name format is specified as a * regular expression in {@link TupleColumnName#COLUMN_NAME_PATTERN}. */ public static final class TupleColumnName { /** * Column name format of a tuple. * <p> * The format is: <code>([\\p{Graph}&&[^\\.]]+)(\\.([\\p{Graph}&&[^\\.]]+))?</code> * <p> */ public static final Pattern COLUMN_NAME_PATTERN = Pattern .compile("([\\p{Graph}&&[^\\.]]+)(\\.([\\p{Graph}&&[^\\.]]+))?"); /** * required, means the id of the column name */ private String id; /** * optional, for the Map column type only. For example, * A.B means this tuple has a column named "A" and it's a * Map type, and user is trying to access the value of key * "B" */ private String mapKey; private static Map<String, TupleColumnName> tupleColumnNames = new HashMap<String, TupleColumnName>(); /** * convert the <code>columnName</code> into a {@link TupleColumnName}. */ public synchronized static TupleColumnName valueOf(String columnName) { if (columnName == null || columnName.trim().isEmpty()) { throw new IllegalArgumentException("column name cannot be null nor empty string."); } TupleColumnName tcn = null; if ((tcn = tupleColumnNames.get(columnName)) != null) { return tcn; } else { int dotIdx = columnName.indexOf("."); if (dotIdx < 0) { tcn = new TupleColumnName(); tcn.id = columnName; tcn.mapKey = null; } else if (dotIdx > 0) { if (dotIdx + 1 == columnName.length()) { throw new IllegalArgumentException("Invalid format of Tuple column name:[" + columnName + "], please refer the correct format in {@link Tuple#COLUMN_NAME_PATTERN}"); } tcn = new TupleColumnName(); tcn.id = columnName.substring(0, dotIdx); tcn.mapKey = columnName.substring(dotIdx + 1); } else { // dotIdx==0 throw new IllegalArgumentException("Invalid format of Tuple column name:[" + columnName + "], please refer the correct format in {@link Tuple#COLUMN_NAME_PATTERN}"); } tupleColumnNames.put(columnName, tcn); return tcn; } } /** * Get the column ID. */ public String getID() { return this.id; } /** * If the column type is map, user can use, for * example, <code>ID.MAP_KEY</code> to access a column * named <code>ID</code>, which is a map, and then use * <code>MAP_KEY</code> as the key to get the value. */ public String getMapKey() { return this.mapKey; } } }