com.moz.fiji.schema.FijiSchemaTable.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.schema.FijiSchemaTable.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.schema;

import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;

import com.google.common.base.Objects;
import org.apache.avro.Schema;
import org.apache.avro.util.WeakIdentityHashMap;
import org.apache.commons.lang.builder.HashCodeBuilder;

import com.moz.fiji.annotations.ApiAudience;
import com.moz.fiji.annotations.ApiStability;
import com.moz.fiji.annotations.Inheritance;
import com.moz.fiji.schema.avro.SchemaTableBackup;
import com.moz.fiji.schema.util.BytesKey;
import com.moz.fiji.schema.util.Hasher;

/**
 * The Fiji schema table, which contains the lookup table between schema IDs, hashes, and full
 * schemas.
 *
 * @see FijiMetaTable
 * @see FijiSystemTable
 */
@ApiAudience.Framework
@ApiStability.Stable
@Inheritance.Sealed
public interface FijiSchemaTable extends Flushable, Closeable {
    /**
     * Looks up a schema ID given an Avro schema object.
     *
     * If the schema is unknown, allocates a new ID and stores the new schema mapping.
     *
     * @param schema The full schema to store in the table.
     * @return The schema ID.
     * @throws IOException on I/O error.
     */
    long getOrCreateSchemaId(Schema schema) throws IOException;

    /**
     * Looks up a schema hash given an Avro schema object.
     *
     * If the schema is unknown, allocates a new ID and stores the new schema mapping.
     *
     * @param schema Avro schema to look up.
     * @return The schema hash.
     * @throws IOException on I/O error.
     */
    BytesKey getOrCreateSchemaHash(Schema schema) throws IOException;

    /**
     * Computes a schema hash.
     *
     * @param schema The Avro schema to hash.
     * @return The hash of the schema
     */
    BytesKey getSchemaHash(Schema schema);

    /**
     * Looks up a schema given an ID.
     *
     * @param schemaId Schema ID to look up.
     * @return Avro schema, or null if the schema ID is unknown.
     * @throws IOException on I/O error.
     */
    Schema getSchema(long schemaId) throws IOException;

    /**
     * Looks up a schema given a hash.
     *
     * @param schemaHash Schema hash to look up.
     * @return Avro schema, or null if the schema hash is unknown.
     * @throws IOException on I/O error.
     */
    Schema getSchema(BytesKey schemaHash) throws IOException;

    /**
     * Looks up a schema entry from a given ID.
     *
     * @param schemaId Schema ID to look up.
     * @return SchemaEntry matching the given ID or null if the ID is unknown.
     * @throws IOException on I/O error.
     */
    SchemaEntry getSchemaEntry(long schemaId) throws IOException;

    /**
     * Looks up a schema entry from a given hash.
     *
     * @param schemaHash Schema hash to look up.
     * @return SchemaEntry matching the given hash or null if the hash is unknown.
     * @throws IOException on I/O error.
     */
    SchemaEntry getSchemaEntry(BytesKey schemaHash) throws IOException;

    /**
     * Looks up a schema entry from a given Schema definition.
     *
     * @param schema Schema definition to look up.
     * @return SchemaEntry matching the given Schema or null if the schema is unknown.
     * @throws IOException on I/O error.
     */
    SchemaEntry getSchemaEntry(Schema schema) throws IOException;

    /** Association between a schema and its ID. */
    public static class SchemaEntry {
        private final long mId;
        private final BytesKey mHash;
        private final Schema mSchema;

        /**
         * Creates a new schema entry.
         *
         * @param id the schema ID
         * @param hash the schema hash
         * @param schema the Avro schema object
         */
        @ApiAudience.Private
        public SchemaEntry(long id, BytesKey hash, Schema schema) {
            this.mId = id;
            this.mHash = hash;
            this.mSchema = schema;
        }

        /** @return the schema ID */
        public long getId() {
            return this.mId;
        }

        /** @return the schema hash */
        public BytesKey getHash() {
            return this.mHash;
        }

        /** @return the Avro schema object */
        public Schema getSchema() {
            return this.mSchema;
        }

        /** {@inheritDoc} */
        @Override
        public String toString() {
            return Objects.toStringHelper(SchemaEntry.class).add("id", this.mId).add("hash", this.mHash)
                    .add("schema", this.mSchema).toString();
        }

        /** {@inheritDoc} */
        @Override
        public boolean equals(Object other) {
            if (!(other instanceof SchemaEntry)) {
                return false;
            }
            final SchemaEntry entry = (SchemaEntry) other;
            return (this.mId == entry.mId) && (this.mHash.equals(entry.mHash))
                    && (this.mSchema.equals(entry.mSchema));
        }

        /** {@inheritDoc} */
        @Override
        public int hashCode() {
            return new HashCodeBuilder().append(mId).append(mHash).append(mSchema).toHashCode();
        }
    }

    /**
     * Cache providing an efficient mapping from Avro schema object to the schema hash.
     *
     * Computing the hash code of a schema is expensive as it serializes the Avro schema object
     * into JSON.
     */
    @ApiAudience.Private
    static final class SchemaHashCache {
        /**
         * Underlying cache is a weak identity hash map:
         * <li> We must use object IDs since Schema.hashCode() and Schema.equals() implement a
         *      comparison that ignores doc fields or default values.
         * <li> We must use a weak map to ensure the cache gets garbage collected properly.
         */
        private final Map<Schema, BytesKey> mCache = Collections
                .synchronizedMap(new WeakIdentityHashMap<Schema, BytesKey>());

        /**
         * Computes a hash of the specified Avro schema.
         *
         * Fiji currently uses MD5 sums (128 bits) of the schema JSON representation.
         *
         * @param schema Avro schema to compute a hash of.
         * @return Hash code as an array of bytes (16 bytes).
         */
        public static byte[] hashSchema(Schema schema) {
            return Hasher.hash(schema.toString());
        }

        /**
         * Hashes an Avro schema.
         *
         * @param schema Avro schema to hash.
         * @return the schema hash.
         */
        public BytesKey getHash(Schema schema) {
            final BytesKey hash = mCache.get(schema);
            if (null != hash) {
                return hash;
            }
            final BytesKey newHash = new BytesKey(hashSchema(schema));
            mCache.put(schema, newHash);
            return newHash;
        }
    }

    /**
     * Returns schema backup information in a form that can be directly written to a MetadataBackup
     * record. To read more about the avro type that has been specified to store this info, see
     * Layout.avdl
     *
     * @throws IOException on I/O error.
     * @return A list of schema table entries.
     */
    SchemaTableBackup toBackup() throws IOException;

    /**
     * Restores the schema entries from the specified backup record.
     *
     * @param backup The schema entries from a MetadataBackup record. This consist of the schema
     *     definition, schema id, and schema hash.
     * @throws IOException on I/O error.
     */
    void fromBackup(SchemaTableBackup backup) throws IOException;
}