co.cask.cdap.format.StructuredRecordStringConverter.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.format.StructuredRecordStringConverter.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.format;

import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
import com.google.gson.stream.JsonWriter;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;

/**
 * Utility class for converting {@link StructuredRecord} to and from json.
 */
public final class StructuredRecordStringConverter {

    // Known Java type to schema type mapping
    // Doesn't have map and array as those need to use instanceof to check
    private static final Map<Class<?>, Schema.Type> TYPE_TO_SCHEMA = new IdentityHashMap<>(
            ImmutableMap.<Class<?>, Schema.Type>builder().put(Boolean.class, Schema.Type.BOOLEAN)
                    .put(Byte.class, Schema.Type.INT).put(Short.class, Schema.Type.INT)
                    .put(Integer.class, Schema.Type.INT).put(Long.class, Schema.Type.LONG)
                    .put(Float.class, Schema.Type.FLOAT).put(Double.class, Schema.Type.DOUBLE)
                    .put(String.class, Schema.Type.STRING).put(ByteBuffer.class, Schema.Type.BYTES)
                    .put(byte[].class, Schema.Type.BYTES).put(StructuredRecord.class, Schema.Type.RECORD).build());

    private static final EnumMap<Schema.Type, JsonToken> SCHEMA_TO_JSON_TYPE = new EnumMap<>(
            ImmutableMap.<Schema.Type, JsonToken>builder().put(Schema.Type.NULL, JsonToken.NULL)
                    .put(Schema.Type.BOOLEAN, JsonToken.BOOLEAN).put(Schema.Type.INT, JsonToken.NUMBER)
                    .put(Schema.Type.LONG, JsonToken.NUMBER).put(Schema.Type.FLOAT, JsonToken.NUMBER)
                    .put(Schema.Type.DOUBLE, JsonToken.NUMBER).put(Schema.Type.STRING, JsonToken.STRING)
                    .put(Schema.Type.BYTES, JsonToken.BEGIN_ARRAY).put(Schema.Type.ARRAY, JsonToken.BEGIN_ARRAY)
                    .put(Schema.Type.MAP, JsonToken.BEGIN_OBJECT).put(Schema.Type.RECORD, JsonToken.BEGIN_OBJECT)
                    .build());

    /**
     * Converts a {@link StructuredRecord} to a json string.
     */
    public static String toJsonString(StructuredRecord record) throws IOException {
        StringWriter strWriter = new StringWriter();
        JsonWriter writer = new JsonWriter(strWriter);
        try {
            writeJson(writer, record.getSchema(), record);
            return strWriter.toString();
        } finally {
            writer.close();
        }
    }

    /**
     * Converts a json string to a {@link StructuredRecord} based on the schema.
     */
    public static StructuredRecord fromJsonString(String json, Schema schema) throws IOException {
        JsonReader reader = new JsonReader(new StringReader(json));
        try {
            return (StructuredRecord) readJson(reader, schema);
        } finally {
            reader.close();
        }
    }

    /**
     * Converts a {@link StructuredRecord} to a delimited string.
     */
    public static String toDelimitedString(final StructuredRecord record, String delimiter) {
        return Joiner.on(delimiter)
                .join(Iterables.transform(record.getSchema().getFields(), new Function<Schema.Field, String>() {
                    @Override
                    public String apply(Schema.Field field) {
                        return record.get(field.getName()).toString();
                    }
                }));
    }

    /**
     * Converts a delimited string to a {@link StructuredRecord} based on the schema.
     */
    public static StructuredRecord fromDelimitedString(String delimitedString, String delimiter, Schema schema) {
        StructuredRecord.Builder builder = StructuredRecord.builder(schema);
        Iterator<Schema.Field> fields = schema.getFields().iterator();

        for (String part : Splitter.on(delimiter).split(delimitedString)) {
            if (!part.isEmpty()) {
                builder.convertAndSet(fields.next().getName(), part);
            }
        }

        return builder.build();
    }

    private static Object readJson(JsonReader reader, Schema schema) throws IOException {
        switch (schema.getType()) {
        case NULL:
            reader.nextNull();
            return null;
        case BOOLEAN:
            return reader.nextBoolean();
        case INT:
            return reader.nextInt();
        case LONG:
            return reader.nextLong();
        case FLOAT:
            // Force down cast
            return (float) reader.nextDouble();
        case DOUBLE:
            return reader.nextDouble();
        case BYTES:
            return readBytes(reader);
        case STRING:
            return reader.nextString();
        case ENUM:
            // Currently there is no standard container to represent enum type
            return reader.nextString();
        case ARRAY:
            return readArray(reader, schema.getComponentSchema());
        case MAP:
            return readMap(reader, schema.getMapSchema());
        case RECORD:
            return readRecord(reader, schema);
        case UNION:
            return readUnion(reader, schema);
        }

        throw new IOException("Unsupported schema: " + schema);
    }

    private static byte[] readBytes(JsonReader reader) throws IOException {
        ByteArrayOutputStream os = new ByteArrayOutputStream(128);
        reader.beginArray();
        while (reader.peek() != JsonToken.END_ARRAY) {
            os.write(reader.nextInt());
        }
        reader.endArray();
        return os.toByteArray();
    }

    private static List<Object> readArray(JsonReader reader, Schema elementSchema) throws IOException {
        List<Object> result = new ArrayList<>();
        reader.beginArray();
        while (reader.peek() != JsonToken.END_ARRAY) {
            result.add(readJson(reader, elementSchema));
        }
        reader.endArray();
        return result;
    }

    private static Map<Object, Object> readMap(JsonReader reader, Map.Entry<Schema, Schema> mapSchema)
            throws IOException {
        Schema keySchema = mapSchema.getKey();
        if (!keySchema.isCompatible(Schema.of(Schema.Type.STRING))) {
            throw new IOException("Complex key type not supported: " + keySchema);
        }

        Schema valueSchema = mapSchema.getValue();
        Map<Object, Object> result = new HashMap<>();

        reader.beginObject();
        while (reader.peek() != JsonToken.END_OBJECT) {
            Object key = convertKey(reader.nextName(), keySchema.getType());
            result.put(key, readJson(reader, valueSchema));
        }
        reader.endObject();

        return result;
    }

    private static Object convertKey(String key, Schema.Type type) throws IOException {
        switch (type) {
        case STRING:
            return key;
        case BOOLEAN:
            return Boolean.valueOf(key);
        case INT:
            return Integer.valueOf(key);
        case LONG:
            return Long.valueOf(key);
        case FLOAT:
            return Float.valueOf(key);
        case DOUBLE:
            return Double.valueOf(key);
        }
        throw new IOException("Unable to convert string to type " + type);
    }

    private static StructuredRecord readRecord(JsonReader reader, Schema schema) throws IOException {
        StructuredRecord.Builder builder = StructuredRecord.builder(schema);

        reader.beginObject();
        while (reader.peek() != JsonToken.END_OBJECT) {
            Schema.Field field = schema.getField(reader.nextName());
            if (field == null) {
                // Ignore unrecognized fields
                reader.skipValue();
                continue;
            }

            builder.set(field.getName(), readJson(reader, field.getSchema()));
        }
        reader.endObject();

        return builder.build();
    }

    private static Object readUnion(JsonReader reader, Schema unionSchema) throws IOException {
        JsonToken token = reader.peek();
        // Based on the token to guess the schema
        for (Schema schema : unionSchema.getUnionSchemas()) {
            if (SCHEMA_TO_JSON_TYPE.get(schema.getType()) == token) {
                return readJson(reader, schema);
            }
        }

        throw new IOException("No matching schema found for union type: " + unionSchema);
    }

    private static void writeJson(JsonWriter writer, Schema schema, Object value) throws IOException {
        switch (schema.getType()) {
        case NULL:
            writer.nullValue();
            break;
        case BOOLEAN:
            writer.value((Boolean) value);
            break;
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
            writer.value((Number) value);
            break;
        case BYTES:
            writeBytes(writer, value);
            break;
        case STRING:
            writer.value((String) value);
            break;
        case ENUM:
            writer.value(((Enum) value).name());
            break;
        case ARRAY:
            writeArray(writer, schema.getComponentSchema(), value);
            break;
        case MAP:
            writeMap(writer, schema.getMapSchema(), value);
            break;
        case RECORD:
            writeRecord(writer, schema, value);
            break;
        case UNION:
            writeJson(writer, findUnionSchema(schema, value), value);
            break;
        }
    }

    private static void writeBytes(JsonWriter writer, Object value) throws IOException {
        if (value instanceof ByteBuffer) {
            writeBytes(writer, (ByteBuffer) value);
        } else if (value.getClass().isArray() && value.getClass().getComponentType().equals(byte.class)) {
            byte[] bytes = (byte[]) value;
            writeBytes(writer, bytes, 0, bytes.length);
        } else {
            throw new IOException("Expects either ByteBuffer or byte[]. Got " + value.getClass());
        }
    }

    private static void writeBytes(JsonWriter writer, ByteBuffer buffer) throws IOException {
        if (buffer.hasArray()) {
            writeBytes(writer, buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
        } else {
            byte[] buf = Bytes.getBytes(buffer);
            buffer.mark();
            buffer.get(buf);
            buffer.reset();
            writeBytes(writer, buf, 0, buf.length);
        }
    }

    private static void writeBytes(JsonWriter writer, byte[] bytes, int off, int len) throws IOException {
        writer.beginArray();
        for (int i = off; i < off + len; i++) {
            writer.value(bytes[i]);
        }
        writer.endArray();
    }

    private static void writeArray(JsonWriter writer, Schema elementSchema, Object value) throws IOException {
        if (!(value instanceof Collection) && !value.getClass().isArray()) {
            throw new IOException("Expects either Collection or array. Got: " + value.getClass());
        }

        writer.beginArray();
        if (value instanceof Collection) {
            for (Object element : (Collection) value) {
                writeJson(writer, elementSchema, element);
            }
        } else {
            Object[] array = (Object[]) value;
            for (Object anArray : array) {
                writeJson(writer, elementSchema, anArray);
            }
        }
        writer.endArray();
    }

    private static void writeMap(JsonWriter writer, Map.Entry<Schema, Schema> entrySchema, Object value)
            throws IOException {
        if (!(value instanceof Map)) {
            throw new IOException("Expects Map. Got: " + value.getClass());
        }

        Schema keySchema = entrySchema.getKey();
        if (!keySchema.isCompatible(Schema.of(Schema.Type.STRING))) {
            throw new IOException("Complex key type not supported: " + keySchema);
        }

        Schema valueSchema = entrySchema.getValue();

        writer.beginObject();
        for (Map.Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            writer.name(entry.getKey().toString());
            writeJson(writer, valueSchema, entry.getValue());
        }
        writer.endObject();
    }

    private static void writeRecord(JsonWriter writer, Schema schema, Object value) throws IOException {
        if (!(value instanceof StructuredRecord)) {
            throw new IOException("Expects StructuredRecord. Got: " + value.getClass());
        }

        StructuredRecord record = (StructuredRecord) value;
        writer.beginObject();
        for (Schema.Field field : schema.getFields()) {
            Object fieldValue = record.get(field.getName());
            if (fieldValue != null) {
                writer.name(field.getName());
                writeJson(writer, field.getSchema(), fieldValue);
            }
        }
        writer.endObject();
    }

    private static Schema findUnionSchema(Schema unionSchema, @Nullable Object value) throws IOException {
        Schema.Type type = getSchemaType(value);

        for (Schema schema : unionSchema.getUnionSchemas()) {
            // Just match the type, not matching the detail schema as it'd be too expensive.
            if (schema.getType() == type) {
                return schema;
            }
        }
        throw new IOException("Value type " + type + " not valid in union: " + unionSchema);
    }

    private static Schema.Type getSchemaType(@Nullable Object value) throws IOException {
        if (value == null) {
            return Schema.Type.NULL;
        }

        Class<?> cls = value.getClass();
        Schema.Type type = TYPE_TO_SCHEMA.get(cls);
        if (type != null) {
            return type;
        }

        if (Collection.class.isAssignableFrom(cls) || cls.isArray()) {
            return Schema.Type.ARRAY;
        }

        if (Map.class.isAssignableFrom(cls)) {
            return Schema.Type.MAP;
        }

        throw new IOException("Unsupported type found in StructuredRecord: " + cls);
    }

    private StructuredRecordStringConverter() {
        //inaccessible constructor for static class
    }
}