Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.avro; import org.apache.avro.Schema; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.JobConf; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.math.BigInteger; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.Buffer; import java.nio.ByteBuffer; import java.util.List; import java.util.Properties; /** * Utilities useful only to the AvroSerde itself. Not mean to be used by * end-users but public for interop to the ql package. */ public class AvroSerdeUtils { private static final Log LOG = LogFactory.getLog(AvroSerdeUtils.class); /** * Enum container for all avro table properties. * If introducing a new avro-specific table property, * add it here. Putting them in an enum rather than separate strings * allows them to be programmatically grouped and referenced together. */ public static enum AvroTableProperties { SCHEMA_LITERAL("avro.schema.literal"), SCHEMA_URL("avro.schema.url"), SCHEMA_NAMESPACE( "avro.schema.namespace"), SCHEMA_NAME("avro.schema.name"), SCHEMA_DOC( "avro.schema.doc"), AVRO_SERDE_SCHEMA( "avro.serde.schema"), SCHEMA_RETRIEVER("avro.schema.retriever"); private final String propName; AvroTableProperties(String propName) { this.propName = propName; } public String getPropName() { return this.propName; } } // Following parameters slated for removal, prefer usage of enum above, that allows programmatic access. @Deprecated public static final String SCHEMA_LITERAL = "avro.schema.literal"; @Deprecated public static final String SCHEMA_URL = "avro.schema.url"; @Deprecated public static final String SCHEMA_NAMESPACE = "avro.schema.namespace"; @Deprecated public static final String SCHEMA_NAME = "avro.schema.name"; @Deprecated public static final String SCHEMA_DOC = "avro.schema.doc"; @Deprecated public static final String AVRO_SERDE_SCHEMA = AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(); @Deprecated public static final String SCHEMA_RETRIEVER = AvroTableProperties.SCHEMA_RETRIEVER.getPropName(); public static final String SCHEMA_NONE = "none"; public static final String EXCEPTION_MESSAGE = "Neither " + AvroTableProperties.SCHEMA_LITERAL.getPropName() + " nor " + AvroTableProperties.SCHEMA_URL.getPropName() + " specified, can't determine table schema"; /** * Determine the schema to that's been provided for Avro serde work. * @param properties containing a key pointing to the schema, one way or another * @return schema to use while serdeing the avro file * @throws IOException if error while trying to read the schema from another location * @throws AvroSerdeException if unable to find a schema or pointer to it in the properties */ public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties) throws IOException, AvroSerdeException { String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); if (schemaString != null && !schemaString.equals(SCHEMA_NONE)) return AvroSerdeUtils.getSchemaFor(schemaString); // Try pulling directly from URL schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()); if (schemaString == null || schemaString.equals(SCHEMA_NONE)) throw new AvroSerdeException(EXCEPTION_MESSAGE); try { Schema s = getSchemaFromFS(schemaString, conf); if (s == null) { //in case schema is not a file system return AvroSerdeUtils.getSchemaFor(new URL(schemaString).openStream()); } return s; } catch (IOException ioe) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe); } catch (URISyntaxException urie) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie); } } // Protected for testing and so we can pass in a conf for testing. protected static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) throws IOException, URISyntaxException { FSDataInputStream in = null; FileSystem fs = null; try { fs = FileSystem.get(new URI(schemaFSUrl), conf); } catch (IOException ioe) { //return null only if the file system in schema is not recognized String msg = "Failed to open file system for uri " + schemaFSUrl + " assuming it is not a FileSystem url"; LOG.debug(msg, ioe); return null; } try { in = fs.open(new Path(schemaFSUrl)); Schema s = AvroSerdeUtils.getSchemaFor(in); return s; } finally { if (in != null) in.close(); } } /** * Determine if an Avro schema is of type Union[T, NULL]. Avro supports nullable * types via a union of type T and null. This is a very common use case. * As such, we want to silently convert it to just T and allow the value to be null. * * @return true if type represents Union[T, Null], false otherwise */ public static boolean isNullableType(Schema schema) { return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2 && (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) || schema.getTypes().get(1).getType().equals(Schema.Type.NULL)); // [null, null] not allowed, so this check is ok. } /** * In a nullable type, get the schema for the non-nullable type. This method * does no checking that the provides Schema is nullable. */ public static Schema getOtherTypeFromNullableType(Schema schema) { List<Schema> types = schema.getTypes(); return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0); } /** * Determine if we're being executed from within an MR job or as part * of a select * statement. The signals for this varies between Hive versions. * @param job that contains things that are or are not set in a job * @return Are we in a job or not? */ public static boolean insideMRJob(JobConf job) { return job != null && (HiveConf.getVar(job, HiveConf.ConfVars.PLAN) != null) && (!HiveConf.getVar(job, HiveConf.ConfVars.PLAN).isEmpty()); } public static Buffer getBufferFromBytes(byte[] input) { ByteBuffer bb = ByteBuffer.wrap(input); return bb.rewind(); } public static Buffer getBufferFromDecimal(HiveDecimal dec, int scale) { if (dec == null) { return null; } dec = dec.setScale(scale); return AvroSerdeUtils.getBufferFromBytes(dec.unscaledValue().toByteArray()); } public static byte[] getBytesFromByteBuffer(ByteBuffer byteBuffer) { byteBuffer.rewind(); byte[] result = new byte[byteBuffer.limit()]; byteBuffer.get(result); return result; } public static HiveDecimal getHiveDecimalFromByteBuffer(ByteBuffer byteBuffer, int scale) { byte[] result = getBytesFromByteBuffer(byteBuffer); HiveDecimal dec = HiveDecimal.create(new BigInteger(result), scale); return dec; } public static Schema getSchemaFor(String str) { Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse(str); return schema; } public static Schema getSchemaFor(File file) { Schema.Parser parser = new Schema.Parser(); Schema schema; try { schema = parser.parse(file); } catch (IOException e) { throw new RuntimeException("Failed to parse Avro schema from " + file.getName(), e); } return schema; } public static Schema getSchemaFor(InputStream stream) { Schema.Parser parser = new Schema.Parser(); Schema schema; try { schema = parser.parse(stream); } catch (IOException e) { throw new RuntimeException("Failed to parse Avro schema", e); } return schema; } }