Java tutorial
package org.apache.hawq.pxf.service.io; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import org.apache.hawq.pxf.api.io.DataType; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.io.*; import java.util.Arrays; import static org.apache.hawq.pxf.api.io.DataType.*; /** * This class represents a GPDB record in the form of * a Java object. */ public class GPDBWritable implements Writable { /* * GPDBWritable is using the following serialization form: * Total Length | Version | Error Flag | # of columns | Col type |...| Col type | Null Bit array | Col val... * 4 byte | 2 byte | 1 byte | 2 byte | 1 byte |...| 1 byte | ceil(# of columns/8) byte | Fixed or Var length * * For fixed length type, we know the length. * In the col val, we align pad according to the alignment requirement of the type. * For var length type, the alignment is always 4 byte. * For var length type, col val is <4 byte length><payload val> */ private static final Log LOG = LogFactory.getLog(GPDBWritable.class); private static final int EOF = -1; /* * Enum of the Database type */ private enum DBType { BIGINT(8, 8), BOOLEAN(1, 1), FLOAT8(8, 8), INTEGER(4, 4), REAL(4, 4), SMALLINT(2, 2), BYTEA(4, -1), TEXT(4, -1); private final int typelength; // -1 means var length private final int alignment; DBType(int align, int len) { this.typelength = len; this.alignment = align; } public int getTypeLength() { return typelength; } public boolean isVarLength() { return typelength == -1; } // return the alignment requirement of the type public int getAlignment() { return alignment; } } /* * Constants */ private static final int PREV_VERSION = 1; private static final int VERSION = 2; /* for backward compatibility */ private static final String CHARSET = "UTF-8"; /* * Local variables */ protected int[] colType; protected Object[] colValue; protected int alignmentOfEightBytes = 8; protected byte errorFlag = 0; protected int pktlen = EOF; public int[] getColType() { return colType; } /** * An exception class for column type definition and * set/get value mismatch. */ public class TypeMismatchException extends IOException { public TypeMismatchException(String msg) { super(msg); } } /** * Empty Constructor */ public GPDBWritable() { initializeEightByteAlignment(); } /** * Constructor to build a db record. colType defines the schema * * @param columnType the table column types */ public GPDBWritable(int[] columnType) { initializeEightByteAlignment(); colType = columnType; colValue = new Object[columnType.length]; } /** * Constructor to build a db record from a serialized form. * * @param data a record in the serialized form * @throws IOException if the data is malformatted. */ public GPDBWritable(byte[] data) throws IOException { initializeEightByteAlignment(); ByteArrayInputStream bis = new ByteArrayInputStream(data); DataInputStream dis = new DataInputStream(bis); readFields(dis); } /* * Read first 4 bytes, and verify it's a valid packet length. * Upon error returns EOF. */ private int readPktLen(DataInput in) throws IOException { pktlen = EOF; try { pktlen = in.readInt(); } catch (EOFException e) { LOG.debug("Reached end of stream (EOFException)"); return EOF; } if (pktlen == EOF) { LOG.debug("Reached end of stream (returned -1)"); } return pktlen; } @Override public void readFields(DataInput in) throws IOException { /* * extract pkt len. * * GPSQL-1107: * The DataInput might already be empty (EOF), but we can't check it beforehand. * If that's the case, pktlen is updated to -1, to mark that the object is still empty. * (can be checked with isEmpty()). */ pktlen = readPktLen(in); if (isEmpty()) { return; } /* extract the version and col cnt */ int version = in.readShort(); int curOffset = 4 + 2; int colCnt; /* !!! Check VERSION !!! */ if (version != GPDBWritable.VERSION && version != GPDBWritable.PREV_VERSION) { throw new IOException("Current GPDBWritable version(" + GPDBWritable.VERSION + ") does not match input version(" + version + ")"); } if (version == GPDBWritable.VERSION) { errorFlag = in.readByte(); curOffset += 1; } colCnt = in.readShort(); curOffset += 2; /* Extract Column Type */ colType = new int[colCnt]; DBType[] coldbtype = new DBType[colCnt]; for (int i = 0; i < colCnt; i++) { int enumType = (in.readByte()); curOffset += 1; if (enumType == DBType.BIGINT.ordinal()) { colType[i] = BIGINT.getOID(); coldbtype[i] = DBType.BIGINT; } else if (enumType == DBType.BOOLEAN.ordinal()) { colType[i] = BOOLEAN.getOID(); coldbtype[i] = DBType.BOOLEAN; } else if (enumType == DBType.FLOAT8.ordinal()) { colType[i] = FLOAT8.getOID(); coldbtype[i] = DBType.FLOAT8; } else if (enumType == DBType.INTEGER.ordinal()) { colType[i] = INTEGER.getOID(); coldbtype[i] = DBType.INTEGER; } else if (enumType == DBType.REAL.ordinal()) { colType[i] = REAL.getOID(); coldbtype[i] = DBType.REAL; } else if (enumType == DBType.SMALLINT.ordinal()) { colType[i] = SMALLINT.getOID(); coldbtype[i] = DBType.SMALLINT; } else if (enumType == DBType.BYTEA.ordinal()) { colType[i] = BYTEA.getOID(); coldbtype[i] = DBType.BYTEA; } else if (enumType == DBType.TEXT.ordinal()) { colType[i] = TEXT.getOID(); coldbtype[i] = DBType.TEXT; } else { throw new IOException("Unknown GPDBWritable.DBType ordinal value"); } } /* Extract null bit array */ byte[] nullbytes = new byte[getNullByteArraySize(colCnt)]; in.readFully(nullbytes); curOffset += nullbytes.length; boolean[] colIsNull = byteArrayToBooleanArray(nullbytes, colCnt); /* extract column value */ colValue = new Object[colCnt]; for (int i = 0; i < colCnt; i++) { if (!colIsNull[i]) { /* Skip the alignment padding */ int skipbytes = roundUpAlignment(curOffset, coldbtype[i].getAlignment()) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; /* For fixed length type, increment the offset according to type type length here. * For var length type (BYTEA, TEXT), we'll read 4 byte length header and the * actual payload. */ int varcollen = -1; if (coldbtype[i].isVarLength()) { varcollen = in.readInt(); curOffset += 4 + varcollen; } else { curOffset += coldbtype[i].getTypeLength(); } switch (DataType.get(colType[i])) { case BIGINT: { colValue[i] = in.readLong(); break; } case BOOLEAN: { colValue[i] = in.readBoolean(); break; } case FLOAT8: { colValue[i] = in.readDouble(); break; } case INTEGER: { colValue[i] = in.readInt(); break; } case REAL: { colValue[i] = in.readFloat(); break; } case SMALLINT: { colValue[i] = in.readShort(); break; } /* For BYTEA column, it has a 4 byte var length header. */ case BYTEA: { colValue[i] = new byte[varcollen]; in.readFully((byte[]) colValue[i]); break; } /* For text formatted column, it has a 4 byte var length header * and it's always null terminated string. * So, we can remove the last "\0" when constructing the string. */ case TEXT: { byte[] data = new byte[varcollen]; in.readFully(data, 0, varcollen); colValue[i] = new String(data, 0, varcollen - 1, CHARSET); break; } default: throw new IOException("Unknown GPDBWritable ColType"); } } } /* Skip the ending alignment padding */ int skipbytes = roundUpAlignment(curOffset, 8) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; if (errorFlag != 0) { throw new IOException("Received error value " + errorFlag + " from format"); } } @Override public void write(DataOutput out) throws IOException { int numCol = colType.length; boolean[] nullBits = new boolean[numCol]; int[] colLength = new int[numCol]; byte[] enumType = new byte[numCol]; int[] padLength = new int[numCol]; byte[] padbytes = new byte[8]; /** * Compute the total payload and header length * header = total length (4 byte), Version (2 byte), Error (1 byte), #col (2 byte) * col type array = #col * 1 byte * null bit array = ceil(#col/8) */ int datlen = 4 + 2 + 1 + 2; datlen += numCol; datlen += getNullByteArraySize(numCol); for (int i = 0; i < numCol; i++) { /* Get the enum type */ DBType coldbtype; switch (DataType.get(colType[i])) { case BIGINT: coldbtype = DBType.BIGINT; break; case BOOLEAN: coldbtype = DBType.BOOLEAN; break; case FLOAT8: coldbtype = DBType.FLOAT8; break; case INTEGER: coldbtype = DBType.INTEGER; break; case REAL: coldbtype = DBType.REAL; break; case SMALLINT: coldbtype = DBType.SMALLINT; break; case BYTEA: coldbtype = DBType.BYTEA; break; default: coldbtype = DBType.TEXT; } enumType[i] = (byte) (coldbtype.ordinal()); /* Get the actual value, and set the null bit */ if (colValue[i] == null) { nullBits[i] = true; colLength[i] = 0; } else { nullBits[i] = false; /* * For fixed length type, we get the fixed length. * For var len binary format, the length is in the col value. * For text format, we must convert encoding first. */ if (!coldbtype.isVarLength()) { colLength[i] = coldbtype.getTypeLength(); } else if (!isTextForm(colType[i])) { colLength[i] = ((byte[]) colValue[i]).length; } else { colLength[i] = ((String) colValue[i]).getBytes(CHARSET).length; } /* calculate and add the type alignment padding */ padLength[i] = roundUpAlignment(datlen, coldbtype.getAlignment()) - datlen; datlen += padLength[i]; /* for variable length type, we add a 4 byte length header */ if (coldbtype.isVarLength()) { datlen += 4; } } datlen += colLength[i]; } /* * Add the final alignment padding for the next record */ int endpadding = roundUpAlignment(datlen, 8) - datlen; datlen += endpadding; /* Construct the packet header */ out.writeInt(datlen); out.writeShort(VERSION); out.writeByte(errorFlag); out.writeShort(numCol); /* Write col type */ for (int i = 0; i < numCol; i++) { out.writeByte(enumType[i]); } /* Nullness */ byte[] nullBytes = boolArrayToByteArray(nullBits); out.write(nullBytes); /* Column Value */ for (int i = 0; i < numCol; i++) { if (!nullBits[i]) { /* Pad the alignment byte first */ if (padLength[i] > 0) { out.write(padbytes, 0, padLength[i]); } /* Now, write the actual column value */ switch (DataType.get(colType[i])) { case BIGINT: out.writeLong(((Long) colValue[i])); break; case BOOLEAN: out.writeBoolean(((Boolean) colValue[i])); break; case FLOAT8: out.writeDouble(((Double) colValue[i])); break; case INTEGER: out.writeInt(((Integer) colValue[i])); break; case REAL: out.writeFloat(((Float) colValue[i])); break; case SMALLINT: out.writeShort(((Short) colValue[i])); break; /* For BYTEA format, add 4byte length header at the beginning */ case BYTEA: out.writeInt(colLength[i]); out.write((byte[]) colValue[i]); break; /* For text format, add 4byte length header. string is already '\0' terminated */ default: { out.writeInt(colLength[i]); byte[] data = ((String) colValue[i]).getBytes(CHARSET); out.write(data); break; } } } } /* End padding */ out.write(padbytes, 0, endpadding); } /** * Private helper to convert boolean array to byte array */ private static byte[] boolArrayToByteArray(boolean[] data) { int len = data.length; byte[] byts = new byte[getNullByteArraySize(len)]; for (int i = 0, j = 0, k = 7; i < data.length; i++) { byts[j] |= (data[i] ? 1 : 0) << k--; if (k < 0) { j++; k = 7; } } return byts; } /** * Private helper to determine the size of the null byte array */ private static int getNullByteArraySize(int colCnt) { return (colCnt / 8) + (colCnt % 8 != 0 ? 1 : 0); } /** * Private helper to convert byte array to boolean array */ private static boolean[] byteArrayToBooleanArray(byte[] data, int colCnt) { boolean[] bools = new boolean[colCnt]; for (int i = 0, j = 0, k = 7; i < bools.length; i++) { bools[i] = ((data[j] >> k--) & 0x01) == 1; if (k < 0) { j++; k = 7; } } return bools; } /** * Private helper to round up alignment for the given length */ private int roundUpAlignment(int len, int align) { int commonAlignment = align; if (commonAlignment == 8) { commonAlignment = alignmentOfEightBytes; } return (((len) + ((commonAlignment) - 1)) & ~((commonAlignment) - 1)); } /** * Getter/Setter methods to get/set the column value */ /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setLong(int colIdx, Long val) throws TypeMismatchException { checkType(BIGINT, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setBoolean(int colIdx, Boolean val) throws TypeMismatchException { checkType(BOOLEAN, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setBytes(int colIdx, byte[] val) throws TypeMismatchException { checkType(BYTEA, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setString(int colIdx, String val) throws TypeMismatchException { checkType(TEXT, colIdx, true); if (val != null) { colValue[colIdx] = val + "\0"; } else { colValue[colIdx] = val; } } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setFloat(int colIdx, Float val) throws TypeMismatchException { checkType(REAL, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setDouble(int colIdx, Double val) throws TypeMismatchException { checkType(FLOAT8, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setInt(int colIdx, Integer val) throws TypeMismatchException { checkType(INTEGER, colIdx, true); colValue[colIdx] = val; } /** * Sets the column value of the record. * * @param colIdx the column index * @param val the value * @throws TypeMismatchException the column type does not match */ public void setShort(int colIdx, Short val) throws TypeMismatchException { checkType(SMALLINT, colIdx, true); colValue[colIdx] = val; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Long getLong(int colIdx) throws TypeMismatchException { checkType(BIGINT, colIdx, false); return (Long) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Boolean getBoolean(int colIdx) throws TypeMismatchException { checkType(BOOLEAN, colIdx, false); return (Boolean) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public byte[] getBytes(int colIdx) throws TypeMismatchException { checkType(BYTEA, colIdx, false); return (byte[]) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public String getString(int colIdx) throws TypeMismatchException { checkType(TEXT, colIdx, false); return (String) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Float getFloat(int colIdx) throws TypeMismatchException { checkType(REAL, colIdx, false); return (Float) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Double getDouble(int colIdx) throws TypeMismatchException { checkType(FLOAT8, colIdx, false); return (Double) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Integer getInt(int colIdx) throws TypeMismatchException { checkType(INTEGER, colIdx, false); return (Integer) colValue[colIdx]; } /** * Gets the column value of the record. * * @param colIdx the column index * @return column value * @throws TypeMismatchException the column type does not match */ public Short getShort(int colIdx) throws TypeMismatchException { checkType(SMALLINT, colIdx, false); return (Short) colValue[colIdx]; } /** * Sets the error field. * * @param errorVal the error value */ public void setError(boolean errorVal) { errorFlag = errorVal ? (byte) 1 : (byte) 0; } /** * Returns a string representation of the object. */ @Override public String toString() { if (colType == null) { return null; } StringBuilder result = new StringBuilder(); for (int i = 0; i < colType.length; i++) { result.append("Column ").append(i).append(":"); if (colValue[i] != null) { result.append(colType[i] == BYTEA.getOID() ? byteArrayInString((byte[]) colValue[i]) : colValue[i]); } result.append("\n"); } return result.toString(); } /** * Helper printing function */ private static String byteArrayInString(byte[] data) { StringBuilder result = new StringBuilder(); for (Byte b : data) { result.append(b.intValue()).append(" "); } return result.toString(); } /** * Private Helper to check the type mismatch * If the expected type is stored as string, then it must be set * via setString. * Otherwise, the type must match. */ private void checkType(DataType inTyp, int idx, boolean isSet) throws TypeMismatchException { if (idx < 0 || idx >= colType.length) { throw new TypeMismatchException("Column index is out of range"); } int exTyp = colType[idx]; if (isTextForm(exTyp)) { if (inTyp != TEXT) { throw new TypeMismatchException(formErrorMsg(inTyp.getOID(), TEXT.getOID(), isSet)); } } else if (inTyp != DataType.get(exTyp)) { throw new TypeMismatchException(formErrorMsg(inTyp.getOID(), exTyp, isSet)); } } private String formErrorMsg(int inTyp, int colTyp, boolean isSet) { return isSet ? "Cannot set " + getTypeName(inTyp) + " to a " + getTypeName(colTyp) + " column" : "Cannot get " + getTypeName(inTyp) + " from a " + getTypeName(colTyp) + " column"; } /** * Private Helper routine to tell whether a type is Text form or not * * @param type the type OID that we want to check */ private boolean isTextForm(int type) { return !Arrays.asList(BIGINT, BOOLEAN, BYTEA, FLOAT8, INTEGER, REAL, SMALLINT).contains(DataType.get(type)); } /** * Helper to get the type name. * If a given oid is not in the commonly used list, we * would expect a TEXT for it (for the error message). * * @param oid type OID * @return type name */ public static String getTypeName(int oid) { switch (DataType.get(oid)) { case BOOLEAN: return "BOOLEAN"; case BYTEA: return "BYTEA"; case CHAR: return "CHAR"; case BIGINT: return "BIGINT"; case SMALLINT: return "SMALLINT"; case INTEGER: return "INTEGER"; case TEXT: return "TEXT"; case REAL: return "REAL"; case FLOAT8: return "FLOAT8"; case BPCHAR: return "BPCHAR"; case VARCHAR: return "VARCHAR"; case DATE: return "DATE"; case TIME: return "TIME"; case TIMESTAMP: return "TIMESTAMP"; case NUMERIC: return "NUMERIC"; default: return "TEXT"; } } /* * Get alignment from command line to match to the alignment * the C code uses (see gphdfs/src/protocol_formatter/common.c). */ private void initializeEightByteAlignment() { String alignment = System.getProperty("greenplum.alignment"); if (alignment == null) { return; } alignmentOfEightBytes = Integer.parseInt(alignment); } /** * Returns if the writable object is empty, * based on the pkt len as read from stream. * -1 means nothing was read (eof). * * @return whether the writable object is empty */ public boolean isEmpty() { return pktlen == EOF; } }