Java tutorial
/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.moz.fiji.schema.util; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.avro.Schema; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificRecord; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser.Feature; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ObjectNode; import com.moz.fiji.annotations.ApiAudience; /** * Decode a JSON string into an Avro record. */ @ApiAudience.Private public final class FromJson { /** Utility class cannot be instantiated. */ private FromJson() { } /** * Decodes a JSON node as an Avro value. * * Comply with specified default values when decoding records with missing fields. * * @param json JSON node to decode. * @param schema Avro schema of the value to decode. * @return the decoded value. * @throws IOException on error. */ public static Object fromJsonNode(JsonNode json, Schema schema) throws IOException { switch (schema.getType()) { case INT: { if (!json.isInt()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } return json.getIntValue(); } case LONG: { if (!json.isLong() && !json.isInt()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } return json.getLongValue(); } case FLOAT: { if (json.isDouble() || json.isInt() || json.isLong()) { return (float) json.getDoubleValue(); } throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } case DOUBLE: { if (json.isDouble() || json.isInt() || json.isLong()) { return json.getDoubleValue(); } throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } case STRING: { if (!json.isTextual()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } return json.getTextValue(); } case BOOLEAN: { if (!json.isBoolean()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } return json.getBooleanValue(); } case ARRAY: { if (!json.isArray()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } final List<Object> list = Lists.newArrayList(); final Iterator<JsonNode> it = json.getElements(); while (it.hasNext()) { final JsonNode element = it.next(); list.add(fromJsonNode(element, schema.getElementType())); } return list; } case MAP: { if (!json.isObject()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } assert json instanceof ObjectNode; // Help findbugs out. final Map<String, Object> map = Maps.newHashMap(); final Iterator<Map.Entry<String, JsonNode>> it = ((ObjectNode) json).getFields(); while (it.hasNext()) { final Map.Entry<String, JsonNode> entry = it.next(); map.put(entry.getKey(), fromJsonNode(entry.getValue(), schema.getValueType())); } return map; } case RECORD: { if (!json.isObject()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } final Set<String> fields = Sets.newHashSet(json.getFieldNames()); final SpecificRecord record = newSpecificRecord(schema.getFullName()); for (Schema.Field field : schema.getFields()) { final String fieldName = field.name(); final JsonNode fieldElement = json.get(fieldName); if (fieldElement != null) { final Object fieldValue = fromJsonNode(fieldElement, field.schema()); record.put(field.pos(), fieldValue); } else if (field.defaultValue() != null) { record.put(field.pos(), fromJsonNode(field.defaultValue(), field.schema())); } else { throw new IOException(String.format("Error parsing Avro record '%s' with missing field '%s'.", schema.getFullName(), field.name())); } fields.remove(fieldName); } if (!fields.isEmpty()) { throw new IOException(String.format("Error parsing Avro record '%s' with unexpected fields: %s.", schema.getFullName(), Joiner.on(",").join(fields))); } return record; } case UNION: return fromUnionJsonNode(json, schema); case NULL: { if (!json.isNull()) { throw new IOException( String.format("Avro schema specifies '%s' but got JSON value: '%s'.", schema, json)); } return null; } case BYTES: case FIXED: { if (!json.isTextual()) { throw new IOException(String .format("Avro schema specifies '%s' but got non-string JSON value: '%s'.", schema, json)); } // TODO: parse string into byte array. throw new RuntimeException("Parsing byte arrays is not implemented yet"); } case ENUM: { if (!json.isTextual()) { throw new IOException(String.format( "Avro schema specifies enum '%s' but got non-string JSON value: '%s'.", schema, json)); } final String enumValStr = json.getTextValue(); return enumValue(schema.getFullName(), enumValStr); } default: throw new RuntimeException("Unexpected schema type: " + schema); } } /** * Decodes a union from a JSON node. * * @param json JSON node to decode. * @param schema Avro schema of the union value to decode. * @return the decoded value. * @throws IOException on error. */ private static Object fromUnionJsonNode(JsonNode json, Schema schema) throws IOException { Preconditions.checkArgument(schema.getType() == Schema.Type.UNION); try { final Schema optionalType = AvroUtils.getOptionalType(schema); if (optionalType != null) { return json.isNull() ? null : fromJsonNode(json, optionalType); } } catch (IOException ioe) { // Union value may be wrapped, ignore. } /** Map from Avro schema type to list of schemas of this type in the union. */ final Map<Schema.Type, List<Schema>> typeMap = Maps.newEnumMap(Schema.Type.class); for (Schema type : schema.getTypes()) { List<Schema> types = typeMap.get(type.getType()); if (null == types) { types = Lists.newArrayList(); typeMap.put(type.getType(), types); } types.add(type); } if (json.isObject() && (json.size() == 1)) { final Map.Entry<String, JsonNode> entry = json.getFields().next(); final String typeName = entry.getKey(); final JsonNode actualNode = entry.getValue(); for (Schema type : schema.getTypes()) { if (type.getFullName().equals(typeName)) { return fromJsonNode(actualNode, type); } } } for (Schema type : schema.getTypes()) { try { return fromJsonNode(json, type); } catch (IOException ioe) { // Wrong union type case. } } throw new IOException(String.format("Unable to decode JSON '%s' for union '%s'.", json, schema)); } /** * Decodes a JSON encoded record. * * @param json JSON tree to decode, encoded as a string. * @param schema Avro schema of the value to decode. * @return the decoded value. * @throws IOException on error. */ public static Object fromJsonString(String json, Schema schema) throws IOException { final ObjectMapper mapper = new ObjectMapper(); final JsonParser parser = new JsonFactory().createJsonParser(json).enable(Feature.ALLOW_COMMENTS) .enable(Feature.ALLOW_SINGLE_QUOTES).enable(Feature.ALLOW_UNQUOTED_FIELD_NAMES); final JsonNode root = mapper.readTree(parser); return fromJsonNode(root, schema); } /** * Instantiates a specific record by name. * * @param fullName Fully qualified record name to instantiate. * @return a brand-new specific record instance of the given class. * @throws IOException on error. */ private static SpecificRecord newSpecificRecord(String fullName) throws IOException { try { @SuppressWarnings("rawtypes") final Class klass = Class.forName(fullName); return (SpecificRecord) klass.newInstance(); } catch (ClassNotFoundException cnfe) { throw new IOException(String.format("Error while deserializing JSON: '%s' class not found.", fullName)); } catch (IllegalAccessException iae) { throw new IOException(String.format("Error while deserializing JSON: cannot access '%s'.", fullName)); } catch (InstantiationException ie) { throw new IOException( String.format("Error while deserializing JSON: cannot instantiate '%s'.", fullName)); } } /** * Looks up an Avro enum by name and string value. * * @param fullName Fully qualified enum name to look-up. * @param value Enum value as a string. * @return the Java enum value. * @throws IOException on error. */ @SuppressWarnings("unchecked") private static Object enumValue(String fullName, String value) throws IOException { try { @SuppressWarnings("rawtypes") final Class enumClass = Class.forName(fullName); return Enum.valueOf(enumClass, value); } catch (ClassNotFoundException cnfe) { throw new IOException( String.format("Error while deserializing JSON: '%s' enum class not found.", fullName)); } } /** * Standard Avro JSON decoder. * * @param json JSON string to decode. * @param schema Schema of the value to decode. * @return the decoded value. * @throws IOException on error. */ @SuppressWarnings({ "rawtypes", "unchecked" }) public static Object fromAvroJsonString(String json, Schema schema) throws IOException { final InputStream jsonInput = new ByteArrayInputStream(json.getBytes("UTF-8")); final Decoder decoder = DecoderFactory.get().jsonDecoder(schema, jsonInput); final SpecificDatumReader reader = new SpecificDatumReader(schema); return reader.read(null, decoder); } }