org.kiji.schema.impl.cassandra.RowDecoders.java Source code

Java tutorial

Introduction

Here is the source code for org.kiji.schema.impl.cassandra.RowDecoders.java

Source

/**
 * (c) Copyright 2014 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kiji.schema.impl.cassandra;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import javax.annotation.concurrent.Immutable;
import javax.annotation.concurrent.NotThreadSafe;

import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterators;

import org.kiji.commons.ByteUtils;
import org.kiji.schema.DecodedCell;
import org.kiji.schema.EntityId;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.KConstants;
import org.kiji.schema.KijiCell;
import org.kiji.schema.KijiCellDecoder;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiDataRequest.Column;
import org.kiji.schema.KijiIOException;
import org.kiji.schema.KijiRowKeyComponents;
import org.kiji.schema.KijiTable;
import org.kiji.schema.NoSuchColumnException;
import org.kiji.schema.avro.RowKeyComponent;
import org.kiji.schema.avro.RowKeyFormat2;
import org.kiji.schema.cassandra.CassandraColumnName;
import org.kiji.schema.cassandra.CassandraTableName;
import org.kiji.schema.layout.CassandraColumnNameTranslator;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.layout.impl.CellDecoderProvider;

/**
 * Provides decoding functions for Kiji columns.
 */
public final class RowDecoders {

    /**
     * Create a new column family result set decoder function.
     *
     * @param tableName The Cassandra table that the results are from.
     * @param column The Kiji column name of the family.
     * @param columnRequest The column request defining the request for the family.
     * @param dataRequest The data request defining the request.
     * @param layout The layout of the Kiji table.
     * @param translator A column name translator for the table.
     * @param decoderProvider A cell decoder provider for the table.
     * @param <T> Type of cell values.
     * @return A function to convert a {@link ResultSet} containing a column family to cells.
     */
    public static <T> Function<ResultSet, Iterator<KijiCell<T>>> getColumnFamilyDecoderFunction(
            final CassandraTableName tableName, final KijiColumnName column, final Column columnRequest,
            final KijiDataRequest dataRequest, final KijiTableLayout layout,
            final CassandraColumnNameTranslator translator, final CellDecoderProvider decoderProvider) {
        return new Function<ResultSet, Iterator<KijiCell<T>>>() {
            /** {@inheritDoc} */
            @Override
            public Iterator<KijiCell<T>> apply(final ResultSet resultSet) {
                final int mMaxVersions = columnRequest.getMaxVersions();
                final long mMinTimestamp = dataRequest.getMinTimestamp();
                final long mMaxTimestamp = dataRequest.getMaxTimestamp();

                Iterator<Row> rows = resultSet.iterator();

                if (mMinTimestamp != 0) {
                    rows = Iterators.filter(rows, new MinTimestampPredicate(mMinTimestamp));
                }
                if (mMaxTimestamp != KConstants.END_OF_TIME) {
                    rows = Iterators.filter(rows, new MaxTimestampPredicate(mMaxTimestamp));
                }
                rows = Iterators.filter(rows, new MaxVersionsPredicate(mMaxVersions));

                try {
                    if (layout.getFamilyMap().get(column.getFamily()).isMapType()) {
                        // Map-type family
                        final Function<Row, KijiCell<T>> decoder = new MapFamilyDecoder<>(tableName,
                                translator.toCassandraColumnName(column), translator,
                                decoderProvider.<T>getDecoder(column));

                        return Iterators.transform(rows, decoder);
                    } else {
                        // Group-type family
                        final Function<Row, KijiCell<T>> decoder = new GroupFamilyDecoder<>(tableName,
                                translator.toCassandraColumnName(column), translator, decoderProvider);

                        // Group family decoder may return nulls, so filter them out
                        return Iterators.filter(Iterators.transform(rows, decoder), Predicates.notNull());
                    }
                } catch (NoSuchColumnException e) {
                    throw new IllegalStateException(
                            String.format("Column %s does not exist in Kiji table %s.", column, layout.getName()));
                }
            }
        };
    }

    /**
     * Create a new qualified column result set decoder function.
     *
     * @param column The Kiji column of the Row.
     * @param decoderProvider A cell decoder provider for the table.
     * @param <T> The value type in the column.
     * @return A decoded cell.
     */
    public static <T> Function<ResultSet, Iterator<KijiCell<T>>> getQualifiedColumnDecoderFunction(
            final KijiColumnName column, final CellDecoderProvider decoderProvider) {
        // No min/max timestamp or max versions filter is needed, because the CQL statement for
        // qualified gets only selects the required cells.
        return new Function<ResultSet, Iterator<KijiCell<T>>>() {
            /** {@inheritDoc} */
            @Override
            public Iterator<KijiCell<T>> apply(final ResultSet resultSet) {
                final Function<Row, KijiCell<T>> decoder = new QualifiedColumnDecoder<>(column,
                        decoderProvider.<T>getDecoder(column));
                return Iterators.transform(resultSet.iterator(), decoder);
            }
        };
    }

    /**
     * Get a function for decoding row keys and tokens from Cassandra rows.
     *
     * @param layout The table layout.
     * @return A function to decode row keys and tokens for the table.
     */
    public static Function<Row, TokenRowKeyComponents> getRowKeyDecoderFunction(final KijiTableLayout layout) {
        final RowKeyFormat2 keyFormat = (RowKeyFormat2) layout.getDesc().getKeysFormat();

        switch (keyFormat.getEncoding()) {
        case RAW:
            return new RawRowKeyDecoder(layout);
        case FORMATTED:
            return new FormattedRowKeyDecoder(layout);
        default:
            throw new IllegalArgumentException(
                    String.format("Unknown row key encoding %s.", keyFormat.getEncoding()));
        }
    }

    /**
     * Get a function for converting {@link TokenRowKeyComponents} to {@link EntityId}s.
     *
     * @param table The Kiji table the row keys belong to.
     * @return A function for converting {@link TokenRowKeyComponents} to {@link EntityId}s.
     */
    public static Function<TokenRowKeyComponents, EntityId> getEntityIdFunction(final KijiTable table) {
        return new RowKeyComponentsToEntityId(table);
    }

    /**
     * A function which will decode {@link Row}s from a map-type column.
     *
     * <p>
     *   This function may apply optimizations that make it only suitable to decode {@code Row}s
     *   from the specified group-type family, so do not use it over {@code Row}s from another
     *   family.
     * </p>
     */
    @NotThreadSafe
    private static final class MapFamilyDecoder<T> implements Function<Row, KijiCell<T>> {
        private final CassandraTableName mTableName;
        private final CassandraColumnName mFamilyColumn;
        private final KijiCellDecoder<T> mCellDecoder;
        private final CassandraColumnNameTranslator mColumnTranslator;

        private KijiColumnName mLastColumn = null;
        private ByteBuffer mLastQualifier = null;

        /**
         * Create a map-family column decoder.
         * @param tableName The Cassandra table name.
         * @param familyColumn The Kiji column of the Row.
         * @param columnTranslator The column translator for the table.
         * @param decoder for the table.
         */
        public MapFamilyDecoder(final CassandraTableName tableName, final CassandraColumnName familyColumn,
                final CassandraColumnNameTranslator columnTranslator, final KijiCellDecoder<T> decoder) {
            mFamilyColumn = familyColumn;
            mTableName = tableName;
            mColumnTranslator = columnTranslator;
            mCellDecoder = decoder;
        }

        /**
         * {@inheritDoc}
         *
         * <p>
         *   We cache the previously-used {@code KijiColumnName}. This saves parsing and allocations of
         *   the column name for the common case of iterating through multiple versions of each column
         *   in the family.
         * </p>
         *
         * @param row to decode.
         * @return the decoded KijiCell.
         */
        @Override
        public KijiCell<T> apply(final Row row) {
            final ByteBuffer qualifier = row.getBytes(CQLUtils.QUALIFIER_COL);
            if (!qualifier.equals(mLastQualifier)) {
                mLastQualifier = qualifier;
                try {
                    mLastColumn = mColumnTranslator.toKijiColumnName(mTableName, new CassandraColumnName(
                            mFamilyColumn.getFamily(), ByteUtils.toBytes(row.getBytes(CQLUtils.QUALIFIER_COL))));
                } catch (NoSuchColumnException e) {
                    // There should be no columns that we can't decode, so this signals a logic error
                    throw new InternalKijiError(e);
                }
            }

            final long version = row.getLong(CQLUtils.VERSION_COL);

            try {
                final DecodedCell<T> decodedCell = mCellDecoder
                        .decodeCell(ByteUtils.toBytes(row.getBytes(CQLUtils.VALUE_COL)));
                return KijiCell.create(mLastColumn, version, decodedCell);
            } catch (IOException e) {
                throw new KijiIOException(e);
            }
        }
    }

    /**
     * A function which will decode {@link Row}s from a group-type family. If a column is read which
     * has been dropped, then this function will return null.
     *
     * <p>
     *   This function may use optimizations that make it only suitable to decode {@code Row}s
     *   from the specified group-type family, so do not use it over {@code Row}s from another
     *   family.
     * </p>
     */
    @NotThreadSafe
    private static final class GroupFamilyDecoder<T> implements Function<Row, KijiCell<T>> {
        private final CassandraTableName mTableName;
        private final CellDecoderProvider mDecoderProvider;
        private final CassandraColumnNameTranslator mColumnTranslator;
        private final CassandraColumnName mFamilyColumn;

        private KijiCellDecoder<T> mLastDecoder;
        private KijiColumnName mLastColumn;
        private ByteBuffer mLastQualifier;

        /**
         * Create a qualified column decoder for the provided column.
         *
         * @param tableName The Cassandra table name.
         * @param familyColumn The Kiji column of the Row.
         * @param columnTranslator The column translator for the table.
         * @param decoderProvider A cell decoder provider for the table.
         */
        public GroupFamilyDecoder(final CassandraTableName tableName, final CassandraColumnName familyColumn,
                final CassandraColumnNameTranslator columnTranslator, final CellDecoderProvider decoderProvider) {
            mTableName = tableName;
            mDecoderProvider = decoderProvider;
            mColumnTranslator = columnTranslator;
            mFamilyColumn = familyColumn;
        }

        /**
         * {@inheritDoc}
         *
         * <p>
         *   We cache the previously-used {@code KijiCellDecoder} and {@code KijiColumnName}. This saves
         *   lookups (of the decoder) and allocations (of the column name) for the common case of
         *   iterating through the versions of a column in the family.
         * </p>
         *
         * TODO: We know that all of the KijiCell's decoded from this function always have the same
         * Kiji family, so we should not decode it. Currently the CassandraColumnNameTranslator does not
         * support this.
         *
         * @param row The row to decode.
         * @return the decoded KijiCell.
         */
        @Override
        public KijiCell<T> apply(final Row row) {
            final ByteBuffer qualifier = row.getBytes(CQLUtils.QUALIFIER_COL);

            if (!qualifier.equals(mLastQualifier)) {
                try {
                    mLastQualifier = qualifier.duplicate();
                    mLastColumn = mColumnTranslator.toKijiColumnName(mTableName,
                            new CassandraColumnName(mFamilyColumn.getFamily(), ByteUtils.toBytes(qualifier)));
                    mLastDecoder = mDecoderProvider.getDecoder(mLastColumn);
                } catch (NoSuchColumnException e) {
                    // This can happen when a column is dropped from the group-family layout
                    mLastDecoder = null;
                    mLastColumn = null;
                    mLastQualifier = null;
                    return null;
                }
            }

            final long version = row.getLong(CQLUtils.VERSION_COL);

            try {
                final DecodedCell<T> decodedCell = mLastDecoder
                        .decodeCell(ByteUtils.toBytes(row.getBytes(CQLUtils.VALUE_COL)));
                return KijiCell.create(mLastColumn, version, decodedCell);
            } catch (IOException e) {
                throw new KijiIOException(e);
            }
        }
    }

    /**
     * A function which will decode {@link Row}s from a qualified column.
     *
     * <p>
     *   The column may be from either a map-type or group-type family.
     * </p>
     *
     * <p>
     *   This function may apply optimizations that make it only suitable to decode {@code KeyValue}s
     *   from the specified column, so do not use it over {@code KeyValue}s from another column.
     * </p>
     *
     * @param <T> type of value in the column.
     */
    @Immutable
    private static final class QualifiedColumnDecoder<T> implements Function<Row, KijiCell<T>> {
        private final KijiCellDecoder<T> mCellDecoder;
        private final KijiColumnName mColumnName;

        /**
         * Create a qualified column decoder for the provided column.
         *
         * @param columnName of the column.
         * @param cellDecoder for the table.
         */
        public QualifiedColumnDecoder(final KijiColumnName columnName, final KijiCellDecoder<T> cellDecoder) {
            mCellDecoder = cellDecoder;
            mColumnName = columnName;
        }

        /** {@inheritDoc} */
        @Override
        public KijiCell<T> apply(final Row row) {
            try {
                final DecodedCell<T> decodedCell = mCellDecoder
                        .decodeCell(ByteUtils.toBytes(row.getBytes(CQLUtils.VALUE_COL)));
                return KijiCell.create(mColumnName, row.getLong(CQLUtils.VERSION_COL), decodedCell);
            } catch (IOException e) {
                throw new KijiIOException(e);
            }
        }
    }

    /**
     * A 2-tuple combining a Cassandra token and Kiji row key components.
     */
    @Immutable
    public static class TokenRowKeyComponents {
        private final long mToken;
        private final KijiRowKeyComponents mComponents;

        /**
         * Create a token, row key components tuple.
         *
         * @param token The token.
         * @param components The components.
         */
        public TokenRowKeyComponents(final long token, final KijiRowKeyComponents components) {
            mToken = token;
            mComponents = components;
        }

        /**
         * Get the token.
         *
         * @return The token.
         */
        public long getToken() {
            return mToken;
        }

        /**
         * Get the components.
         *
         * @return The components.
         */
        public KijiRowKeyComponents getComponents() {
            return mComponents;
        }

        /** {@inheritDoc} */
        @Override
        public int hashCode() {
            return Objects.hashCode(mToken, mComponents);
        }

        /** {@inheritDoc} */
        @Override
        public boolean equals(final Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            final TokenRowKeyComponents other = (TokenRowKeyComponents) obj;
            return Objects.equal(this.mToken, other.mToken) && Objects.equal(this.mComponents, other.mComponents);
        }
    }

    /**
     * A comparator for {@link TokenRowKeyComponents}.
     */
    @Immutable
    public static final class TokenRowKeyComponentsComparator implements Comparator<TokenRowKeyComponents> {
        private static final TokenRowKeyComponentsComparator INSTANCE = new TokenRowKeyComponentsComparator();

        /**
         * Get an instance of the comparator.
         *
         * @return An instance of the comparator.
         */
        public static TokenRowKeyComponentsComparator getInstance() {
            return INSTANCE;
        }

        /** Private constructor for non-instantiable class. */
        private TokenRowKeyComponentsComparator() {
        }

        /** {@inheritDoc} */
        @Override
        public int compare(final TokenRowKeyComponents a, final TokenRowKeyComponents b) {
            final long tokenCompare = a.getToken() - b.getToken();
            if (tokenCompare != 0) {
                return (int) tokenCompare;
            } else {
                return a.getComponents().compareTo(b.getComponents());
            }
        }
    }

    /**
     * Decodes a Cassandra row containing the token a raw row key olumn into a
     * {@link TokenRowKeyComponents}.
     */
    @Immutable
    private static final class RawRowKeyDecoder implements Function<Row, TokenRowKeyComponents> {
        private final String mTokenColumn;

        /**
         * Create a row key decoder for a raw row key format table.
         *
         * @param layout The layout of the table.
         */
        private RawRowKeyDecoder(final KijiTableLayout layout) {
            mTokenColumn = CQLUtils.getTokenColumn(layout);
        }

        /** {@inheritDoc} */
        @Override
        public TokenRowKeyComponents apply(final Row row) {
            final int token = row.getInt(mTokenColumn);
            final Object[] components = new Object[] { ByteUtils.toBytes(row.getBytes(CQLUtils.RAW_KEY_COL)) };
            return new TokenRowKeyComponents(token, KijiRowKeyComponents.fromComponents(components));
        }
    }

    /**
     * Decodes a Cassandra row containing the token and row key component columns into a
     * {@link TokenRowKeyComponents}.
     */
    @Immutable
    private static final class FormattedRowKeyDecoder implements Function<Row, TokenRowKeyComponents> {

        private final RowKeyFormat2 mKeyFormat;
        private final String mTokenColumn;

        /**
         * Create a new {@code FormattedRowKeyDecoder}.
         *
         * @param layout The table layout.
         */
        private FormattedRowKeyDecoder(final KijiTableLayout layout) {
            mTokenColumn = CQLUtils.getTokenColumn(layout);
            mKeyFormat = (RowKeyFormat2) layout.getDesc().getKeysFormat();
        }

        /** {@inheritDoc} */
        @Override
        public TokenRowKeyComponents apply(final Row row) {

            final List<RowKeyComponent> formatComponents = mKeyFormat.getComponents();
            final Object[] components = new Object[formatComponents.size()];

            for (int i = 0; i < formatComponents.size(); i++) {
                RowKeyComponent component = formatComponents.get(i);
                // TODO: investigate whether we can do this by position instead of creating a bunch of
                // garbage through column name translation
                final String columnName = CQLUtils.translateEntityIDComponentNameToColumnName(component.getName());
                switch (component.getType()) {
                case STRING: {
                    components[i] = row.getString(columnName);
                    break;
                }
                case INTEGER: {
                    components[i] = row.getInt(columnName);
                    break;
                }
                case LONG: {
                    components[i] = row.getLong(columnName);
                    break;
                }
                default:
                    throw new IllegalArgumentException("Unknown row key component type.");
                }
            }

            return new TokenRowKeyComponents(row.getLong(mTokenColumn),
                    KijiRowKeyComponents.fromComponents(components));
        }
    }

    /**
     * A function for converting {@link TokenRowKeyComponents} to {@link EntityId}s.
     */
    private static class RowKeyComponentsToEntityId implements Function<TokenRowKeyComponents, EntityId> {

        private final KijiTable mTable;

        /**
         * Create a new function for converting a {@link TokenRowKeyComponents} to an {@link EntityId}.
         * The table must not be closed while the function could still evaluate.
         *
         * @param table The table the row key belongs to.
         */
        public RowKeyComponentsToEntityId(final KijiTable table) {
            mTable = table;
        }

        /** {@inheritDoc} */
        @Override
        public EntityId apply(final TokenRowKeyComponents input) {
            return input.getComponents().getEntityIdForTable(mTable);
        }
    }

    /**
     * A predicate to filter excess Kiji Cells of a column from a Cassandra result set.
     */
    @NotThreadSafe
    private static final class MaxVersionsPredicate implements Predicate<Row> {
        private final int mMaxVersions;

        private int mCurrentCount = 0;
        private ByteBuffer mCurrentFamily = null;
        private ByteBuffer mCurrentQualifier = null;

        /**
         * Create a new column limit predicate.
         *
         * @param maxVersions The number of cells from each column to limit to.
         */
        private MaxVersionsPredicate(final int maxVersions) {
            mMaxVersions = maxVersions;
        }

        /** {@inheritDoc} */
        @Override
        public boolean apply(final Row row) {
            final ByteBuffer family = row.getBytes(CQLUtils.FAMILY_COL);
            final ByteBuffer qualifier = row.getBytes(CQLUtils.QUALIFIER_COL);

            if (!family.equals(mCurrentFamily)) {
                mCurrentFamily = family;
                mCurrentQualifier = qualifier;
                mCurrentCount = 0;
            } else if (!qualifier.equals(mCurrentQualifier)) {
                mCurrentQualifier = qualifier;
                mCurrentCount = 0;
            }

            mCurrentCount += 1;
            return mCurrentCount <= mMaxVersions;
        }
    }

    /**
     * A predicate to filter Kiji cells below a minimum timestamp (inclusive).
     */
    @Immutable
    private static final class MinTimestampPredicate implements Predicate<Row> {

        private final long mTimestamp;

        /**
         * Create a new minimum timestamp predicate.
         *
         * @param timestamp The minimum timestamp.
         */
        private MinTimestampPredicate(final long timestamp) {
            mTimestamp = timestamp;
        }

        /** {@inheritDoc} */
        @Override
        public boolean apply(final Row row) {
            return row.getLong(CQLUtils.VERSION_COL) >= mTimestamp;
        }

        /** {@inheritDoc} */
        @Override
        public int hashCode() {
            return Objects.hashCode(mTimestamp);
        }

        /** {@inheritDoc} */
        @Override
        public boolean equals(final Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            if (!super.equals(obj)) {
                return false;
            }
            final MinTimestampPredicate other = (MinTimestampPredicate) obj;
            return Objects.equal(this.mTimestamp, other.mTimestamp);
        }
    }

    /**
     * A predicate to filter Kiji cells above a maximum timestamp (exclusive).
     */
    @Immutable
    private static final class MaxTimestampPredicate implements Predicate<Row> {

        private final long mTimestamp;

        /**
         * Create a new maximum timestamp predicate.
         *
         * @param timestamp The maximum timestamp.
         */
        private MaxTimestampPredicate(final long timestamp) {
            mTimestamp = timestamp;
        }

        /** {@inheritDoc} */
        @Override
        public boolean apply(final Row input) {
            return input.getLong(CQLUtils.VERSION_COL) < mTimestamp;
        }

        /** {@inheritDoc} */
        @Override
        public int hashCode() {
            return Objects.hashCode(mTimestamp);
        }

        /** {@inheritDoc} */
        @Override
        public boolean equals(final Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            if (!super.equals(obj)) {
                return false;
            }
            final MaxTimestampPredicate other = (MaxTimestampPredicate) obj;
            return Objects.equal(this.mTimestamp, other.mTimestamp);
        }
    }

    /** private constructor for utility class. */
    private RowDecoders() {
    }
}