Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.mapreduce.hcat; import java.io.IOException; import java.math.BigDecimal; import java.math.MathContext; import java.sql.Date; import java.sql.SQLException; import java.sql.Time; import java.sql.Timestamp; import java.util.Arrays; import java.util.Map; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DefaultStringifier; import org.apache.hadoop.io.IntWritable; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.DefaultHCatRecord; import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.mapreduce.InputJobInfo; import org.apache.hive.hcatalog.mapreduce.StorerInfo; import org.apache.sqoop.lib.SqoopRecord; import org.apache.sqoop.mapreduce.ImportJobBase; import com.cloudera.sqoop.lib.BlobRef; import com.cloudera.sqoop.lib.ClobRef; import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.lib.FieldFormatter; import com.cloudera.sqoop.lib.LargeObjectLoader; /** * Helper class for Sqoop HCat Integration import jobs. */ public class SqoopHCatImportHelper { public static final Log LOG = LogFactory.getLog(SqoopHCatImportHelper.class.getName()); private static boolean debugHCatImportMapper = false; private InputJobInfo jobInfo; private HCatSchema hCatFullTableSchema; private int fieldCount; private boolean bigDecimalFormatString; private LargeObjectLoader lobLoader; private HCatSchema partitionSchema = null; private HCatSchema dataColsSchema = null; private String hiveDelimsReplacement; private boolean doHiveDelimsReplacement = false; private DelimiterSet hiveDelimiters; private String[] staticPartitionKeys; private int[] hCatFieldPositions; private int colCount; public SqoopHCatImportHelper(Configuration conf) throws IOException, InterruptedException { String inputJobInfoStr = conf.get(HCatConstants.HCAT_KEY_JOB_INFO); jobInfo = (InputJobInfo) HCatUtil.deserialize(inputJobInfoStr); dataColsSchema = jobInfo.getTableInfo().getDataColumns(); partitionSchema = jobInfo.getTableInfo().getPartitionColumns(); StringBuilder storerInfoStr = new StringBuilder(1024); StorerInfo storerInfo = jobInfo.getTableInfo().getStorerInfo(); storerInfoStr.append("HCatalog Storer Info : ").append("\n\tHandler = ") .append(storerInfo.getStorageHandlerClass()).append("\n\tInput format class = ") .append(storerInfo.getIfClass()).append("\n\tOutput format class = ") .append(storerInfo.getOfClass()).append("\n\tSerde class = ").append(storerInfo.getSerdeClass()); Properties storerProperties = storerInfo.getProperties(); if (!storerProperties.isEmpty()) { storerInfoStr.append("\nStorer properties "); for (Map.Entry<Object, Object> entry : storerProperties.entrySet()) { String key = (String) entry.getKey(); Object val = entry.getValue(); storerInfoStr.append("\n\t").append(key).append('=').append(val); } } storerInfoStr.append("\n"); LOG.info(storerInfoStr); hCatFullTableSchema = new HCatSchema(dataColsSchema.getFields()); for (HCatFieldSchema hfs : partitionSchema.getFields()) { hCatFullTableSchema.append(hfs); } fieldCount = hCatFullTableSchema.size(); lobLoader = new LargeObjectLoader(conf, new Path(jobInfo.getTableInfo().getTableLocation())); bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); debugHCatImportMapper = conf.getBoolean(SqoopHCatUtilities.DEBUG_HCAT_IMPORT_MAPPER_PROP, false); IntWritable[] delimChars = DefaultStringifier.loadArray(conf, SqoopHCatUtilities.HIVE_DELIMITERS_TO_REPLACE_PROP, IntWritable.class); hiveDelimiters = new DelimiterSet((char) delimChars[0].get(), (char) delimChars[1].get(), (char) delimChars[2].get(), (char) delimChars[3].get(), delimChars[4].get() == 1 ? true : false); hiveDelimsReplacement = conf.get(SqoopHCatUtilities.HIVE_DELIMITERS_REPLACEMENT_PROP); if (hiveDelimsReplacement == null) { hiveDelimsReplacement = ""; } doHiveDelimsReplacement = Boolean .valueOf(conf.get(SqoopHCatUtilities.HIVE_DELIMITERS_REPLACEMENT_ENABLED_PROP)); IntWritable[] fPos = DefaultStringifier.loadArray(conf, SqoopHCatUtilities.HCAT_FIELD_POSITIONS_PROP, IntWritable.class); hCatFieldPositions = new int[fPos.length]; for (int i = 0; i < fPos.length; ++i) { hCatFieldPositions[i] = fPos[i].get(); } LOG.debug("Hive delims replacement enabled : " + doHiveDelimsReplacement); LOG.debug("Hive Delimiters : " + hiveDelimiters.toString()); LOG.debug("Hive delimiters replacement : " + hiveDelimsReplacement); staticPartitionKeys = conf.getStrings(SqoopHCatUtilities.HCAT_STATIC_PARTITION_KEY_PROP); String partKeysString = staticPartitionKeys == null ? "" : Arrays.toString(staticPartitionKeys); LOG.debug("Static partition key used : " + partKeysString); } public HCatRecord convertToHCatRecord(SqoopRecord sqr) throws IOException, InterruptedException { try { // Loading of LOBs was delayed until we have a Context. sqr.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } if (colCount == -1) { colCount = sqr.getFieldMap().size(); } Map<String, Object> fieldMap = sqr.getFieldMap(); HCatRecord result = new DefaultHCatRecord(fieldCount); for (Map.Entry<String, Object> entry : fieldMap.entrySet()) { String key = entry.getKey(); Object val = entry.getValue(); String hfn = key.toLowerCase(); boolean skip = false; if (staticPartitionKeys != null && staticPartitionKeys.length > 0) { for (int i = 0; i < staticPartitionKeys.length; ++i) { if (staticPartitionKeys[i].equals(hfn)) { skip = true; break; } } } if (skip) { continue; } HCatFieldSchema hfs = null; try { hfs = hCatFullTableSchema.get(hfn); } catch (Exception e) { throw new IOException("Unable to lookup " + hfn + " in the hcat schema"); } if (debugHCatImportMapper) { LOG.debug("SqoopRecordVal: field = " + key + " Val " + val + " of type " + (val == null ? null : val.getClass().getName()) + ", hcattype " + hfs.getTypeString()); } Object hCatVal = toHCat(val, hfs); result.set(hfn, hCatFullTableSchema, hCatVal); } return result; } private Object toHCat(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); if (val == null) { return null; } Object retVal = null; if (val instanceof Number) { retVal = convertNumberTypes(val, hfs); } else if (val instanceof Boolean) { retVal = convertBooleanTypes(val, hfs); } else if (val instanceof String) { retVal = convertStringTypes(val, hfs); } else if (val instanceof java.util.Date) { retVal = converDateTypes(val, hfs); } else if (val instanceof BytesWritable) { if (hfsType == HCatFieldSchema.Type.BINARY) { BytesWritable bw = (BytesWritable) val; retVal = bw.getBytes(); } } else if (val instanceof BlobRef) { if (hfsType == HCatFieldSchema.Type.BINARY) { BlobRef br = (BlobRef) val; byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData(); retVal = bytes; } } else if (val instanceof ClobRef) { retVal = convertClobType(val, hfs); } else { throw new UnsupportedOperationException( "Objects of type " + val.getClass().getName() + " are not suported"); } if (retVal == null) { LOG.error("Unable to convert [" + val + "] of type " + val.getClass().getName() + " to HCatalog type " + hfs.getTypeString()); } return retVal; } private Object convertClobType(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); ClobRef cr = (ClobRef) val; String s = cr.isExternal() ? cr.toString() : cr.getData(); if (hfsType == HCatFieldSchema.Type.STRING) { return s; } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(s, vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hc = new HiveChar(s, cti.getLength()); return hc; } return null; } private Object converDateTypes(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); Date d; Time t; Timestamp ts; if (val instanceof java.sql.Date) { d = (Date) val; if (hfsType == HCatFieldSchema.Type.DATE) { return d; } else if (hfsType == HCatFieldSchema.Type.TIMESTAMP) { return new Timestamp(d.getTime()); } else if (hfsType == HCatFieldSchema.Type.BIGINT) { return (d.getTime()); } else if (hfsType == HCatFieldSchema.Type.STRING) { return val.toString(); } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(val.toString(), vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hChar = new HiveChar(val.toString(), cti.getLength()); return hChar; } } else if (val instanceof java.sql.Time) { t = (Time) val; if (hfsType == HCatFieldSchema.Type.DATE) { return new Date(t.getTime()); } else if (hfsType == HCatFieldSchema.Type.TIMESTAMP) { return new Timestamp(t.getTime()); } else if (hfsType == HCatFieldSchema.Type.BIGINT) { return ((Time) val).getTime(); } else if (hfsType == HCatFieldSchema.Type.STRING) { return val.toString(); } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(val.toString(), vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hChar = new HiveChar(val.toString(), cti.getLength()); return hChar; } } else if (val instanceof java.sql.Timestamp) { ts = (Timestamp) val; if (hfsType == HCatFieldSchema.Type.DATE) { return new Date(ts.getTime()); } else if (hfsType == HCatFieldSchema.Type.TIMESTAMP) { return ts; } else if (hfsType == HCatFieldSchema.Type.BIGINT) { return ts.getTime(); } else if (hfsType == HCatFieldSchema.Type.STRING) { return val.toString(); } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(val.toString(), vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hc = new HiveChar(val.toString(), cti.getLength()); return hc; } } return null; } private Object convertStringTypes(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); if (hfsType == HCatFieldSchema.Type.STRING || hfsType == HCatFieldSchema.Type.VARCHAR || hfsType == HCatFieldSchema.Type.CHAR) { String str = val.toString(); if (doHiveDelimsReplacement) { str = FieldFormatter.hiveStringReplaceDelims(str, hiveDelimsReplacement, hiveDelimiters); } if (hfsType == HCatFieldSchema.Type.STRING) { return str; } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(str, vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hc = new HiveChar(val.toString(), cti.getLength()); return hc; } } else if (hfsType == HCatFieldSchema.Type.DECIMAL) { BigDecimal bd = new BigDecimal(val.toString(), MathContext.DECIMAL128); HiveDecimal hd = HiveDecimal.create(bd); return hd; } return null; } private Object convertBooleanTypes(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); Boolean b = (Boolean) val; if (hfsType == HCatFieldSchema.Type.BOOLEAN) { return b; } else if (hfsType == HCatFieldSchema.Type.TINYINT) { return (byte) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.SMALLINT) { return (short) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.INT) { return (int) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.BIGINT) { return (long) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.FLOAT) { return (float) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.DOUBLE) { return (double) (b ? 1 : 0); } else if (hfsType == HCatFieldSchema.Type.STRING) { return val.toString(); } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(val.toString(), vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hChar = new HiveChar(val.toString(), cti.getLength()); return hChar; } return null; } private Object convertNumberTypes(Object val, HCatFieldSchema hfs) { HCatFieldSchema.Type hfsType = hfs.getType(); if (!(val instanceof Number)) { return null; } if (val instanceof BigDecimal && hfsType == HCatFieldSchema.Type.STRING || hfsType == HCatFieldSchema.Type.VARCHAR || hfsType == HCatFieldSchema.Type.CHAR) { BigDecimal bd = (BigDecimal) val; String bdStr = null; if (bigDecimalFormatString) { bdStr = bd.toPlainString(); } else { bdStr = bd.toString(); } if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(bdStr, vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hChar = new HiveChar(bdStr, cti.getLength()); return hChar; } else { return bdStr; } } Number n = (Number) val; if (hfsType == HCatFieldSchema.Type.TINYINT) { return n.byteValue(); } else if (hfsType == HCatFieldSchema.Type.SMALLINT) { return n.shortValue(); } else if (hfsType == HCatFieldSchema.Type.INT) { return n.intValue(); } else if (hfsType == HCatFieldSchema.Type.BIGINT) { return n.longValue(); } else if (hfsType == HCatFieldSchema.Type.FLOAT) { return n.floatValue(); } else if (hfsType == HCatFieldSchema.Type.DOUBLE) { return n.doubleValue(); } else if (hfsType == HCatFieldSchema.Type.BOOLEAN) { return n.byteValue() == 0 ? Boolean.FALSE : Boolean.TRUE; } else if (hfsType == HCatFieldSchema.Type.STRING) { return n.toString(); } else if (hfsType == HCatFieldSchema.Type.VARCHAR) { VarcharTypeInfo vti = (VarcharTypeInfo) hfs.getTypeInfo(); HiveVarchar hvc = new HiveVarchar(val.toString(), vti.getLength()); return hvc; } else if (hfsType == HCatFieldSchema.Type.CHAR) { CharTypeInfo cti = (CharTypeInfo) hfs.getTypeInfo(); HiveChar hChar = new HiveChar(val.toString(), cti.getLength()); return hChar; } else if (hfsType == HCatFieldSchema.Type.DECIMAL) { BigDecimal bd = new BigDecimal(n.doubleValue(), MathContext.DECIMAL128); return HiveDecimal.create(bd); } return null; } public void cleanup() throws IOException { if (null != lobLoader) { lobLoader.close(); } } }