com.stratio.deep.cassandra.config.CassandraDeepJobConfig.java Source code

Java tutorial

Introduction

Here is the source code for com.stratio.deep.cassandra.config.CassandraDeepJobConfig.java

Source

/*
 * Copyright 2014, Stratio.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.stratio.deep.cassandra.config;

import static com.stratio.deep.cassandra.util.CassandraUtils.createTableQueryGenerator;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.BATCHSIZE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.BISECT_FACTOR;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.CQLPORT;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.CREATE_ON_WRITE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.PAGE_SIZE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.READ_CONSISTENCY_LEVEL;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.RPCPORT;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.WRITE_CONSISTENCY_LEVEL;
import static com.stratio.deep.commons.utils.Utils.quote;

import java.io.Serializable;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.collections.map.HashedMap;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.KeyspaceMetadata;
import com.datastax.driver.core.Metadata;
import com.datastax.driver.core.ProtocolVersion;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.TableMetadata;
import com.stratio.deep.cassandra.filter.value.EqualsInValue;
import com.stratio.deep.commons.config.DeepJobConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.entity.Cell;
import com.stratio.deep.commons.entity.Cells;
import com.stratio.deep.commons.exception.DeepIOException;
import com.stratio.deep.commons.exception.DeepIndexNotFoundException;
import com.stratio.deep.commons.exception.DeepNoSuchFieldException;
import com.stratio.deep.commons.extractor.utils.ExtractorConstants;
import com.stratio.deep.commons.filter.Filter;
import com.stratio.deep.commons.utils.Constants;

import scala.Tuple2;

/**
 * Base class for all config implementations providing default implementations for methods defined in
 * {@link ICassandraDeepJobConfig}.
 */
public abstract class CassandraDeepJobConfig<T> extends DeepJobConfig<T, CassandraDeepJobConfig<T>>
        implements AutoCloseable, ICassandraDeepJobConfig<T> {

    private static final Logger LOG = Logger.getLogger(CassandraDeepJobConfig.class);

    private static final long serialVersionUID = -7179376653643603038L;

    private String partitionerClassName = "org.apache.cassandra.dht.Murmur3Partitioner";

    /**
     * Cassandra server RPC port.
     */
    private Integer rpcPort = Constants.DEFAULT_CASSANDRA_RPC_PORT;

    /**
     * Cassandra server CQL port.
     */
    private Integer cqlPort = Constants.DEFAULT_CASSANDRA_CQL_PORT;

    /**
     * default "where" filter to use to access ColumnFamily's data.
     */
    private final Map<String, Serializable> additionalFilters = new TreeMap<>();

    /**
     * Size of the batch created when writing to Cassandra.
     */
    private int batchSize = Constants.DEFAULT_BATCH_SIZE;

    /**
     * holds columns metadata fetched from Cassandra.
     */
    private transient Map<String, Cell> columnDefinitionMap;

    /**
     * Default read consistency level. Defaults to LOCAL_ONE.
     */
    private String readConsistencyLevel = ConsistencyLevel.LOCAL_ONE.name();

    /**
     * Default write consistency level. Defaults to QUORUM.
     */
    private String writeConsistencyLevel = ConsistencyLevel.QUORUM.name();

    /**
     * Enables/Disables auto-creation of column family when writing to Cassandra. By Default we do not create the output
     * column family.
     */
    protected Boolean createTableOnWrite = Boolean.TRUE;

    private transient Session session;

    private Boolean isInitialized = Boolean.FALSE;

    private int pageSize = Constants.DEFAULT_PAGE_SIZE;

    protected Boolean isWriteConfig = Boolean.TRUE;

    private int bisectFactor = Constants.DEFAULT_BISECT_FACTOR;

    private final int splitSize = Constants.DEFAULT_SPLIT_SIZE;

    private boolean isSplitModeSet = false;

    private boolean isBisectModeSet = true;

    private EqualsInValue equalsInValue = null;

    /**
     * {@inheritDoc}
     */
    @Override
    public CassandraDeepJobConfig<T> session(Session session) {
        this.session = session;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    private static Map<String, Session> cassandraSession = new HashedMap();

    @Override
    public synchronized Session getSession() {
        String id = this.getHost() + ":" + this.cqlPort;
        if (!cassandraSession.containsKey(id)) {
            Cluster cluster = Cluster.builder().withPort(this.cqlPort).addContactPoint(this.getHost())
                    .withCredentials(this.username, this.password).withProtocolVersion(PROTOCOL_VERSION).build();

            session = cluster.connect(quote(this.catalog));
            cassandraSession.put(id, session);
        }

        return cassandraSession.get(id);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void close() {
        LOG.debug("closing " + getClass().getCanonicalName());
        if (session != null) {
            session.close();
        }
    }

    public CassandraDeepJobConfig(Class<T> entityClass) {
        super(entityClass);
    }

    /**
     * {@inheritDoc}
     *
     * @Override protected void finalize() { LOG.debug("finalizing " + getClass().getCanonicalName()); close(); }
     */
    /**
     * Checks if this configuration object has been initialized or not.
     *
     * @throws com.stratio.deep.commons.exception.DeepIllegalAccessException if not initialized
     */
    protected void checkInitialized() {
        if (!isInitialized) {
            initialize();
            LOG.warn("CassandraDeepJobConfig has not been initialized!");
        }
    }

    /**
     * Fetches table metadata from the underlying datastore, using DataStax java driver.
     *
     * @return the table metadata as returned by the driver.
     */
    public TableMetadata fetchTableMetadata() {

        Metadata metadata = getSession().getCluster().getMetadata();
        KeyspaceMetadata ksMetadata = metadata.getKeyspace(quote(this.catalog));

        if (ksMetadata != null) {
            return ksMetadata.getTable(quote(this.table));
        } else {
            return null;
        }
    }

    /**
     * Creates the output column family if not exists. <br/>
     * We first check if the column family exists. <br/>
     * If not, we get the first element from <i>tupleRDD</i> and we use it as a template to get columns metadata.
     * <p>
     * This is a very heavy operation since to obtain the schema we need to get at least one element of the output RDD.
     * </p>
     *
     * @param first the pair RDD.
     */
    public void createOutputTableIfNeeded(Tuple2<Cells, Cells> first) {

        TableMetadata metadata = getSession().getCluster().getMetadata().getKeyspace(this.catalog)
                .getTable(quote(this.table));

        if (metadata == null && !createTableOnWrite) {
            throw new DeepIOException("Cannot write RDD, output table does not exists and configuration object has "
                    + "'createTableOnWrite' = false");
        }

        if (metadata != null) {
            return;
        }

        if (first._1() == null || first._1().isEmpty()) {
            throw new DeepNoSuchFieldException("no key structure found on row metadata");
        }
        String createTableQuery = createTableQueryGenerator(first._1(), first._2(), this.catalog,
                quote(this.table));
        getSession().execute(createTableQuery);
        waitForNewTableMetadata();
    }

    /**
     * waits until table metadata is not null
     */
    private void waitForNewTableMetadata() {
        TableMetadata metadata;
        int retries = 0;
        final int waitTime = 100;
        do {
            metadata = getSession().getCluster().getMetadata().getKeyspace(this.catalog)
                    .getTable(quote(this.table));

            if (metadata != null) {
                continue;
            }

            LOG.warn(String.format("Metadata for new table %s.%s NOT FOUND, waiting %d millis", this.catalog,
                    this.table, waitTime));
            try {
                Thread.sleep(waitTime);
            } catch (InterruptedException e) {
                LOG.error("Sleep interrupted", e);
            }

            retries++;

            if (retries >= 10) {
                throw new DeepIOException("Cannot retrieve metadata for the newly created CF ");
            }
        } while (metadata == null);
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public synchronized Map<String, Cell> columnDefinitions() {
        if (columnDefinitionMap != null) {
            return columnDefinitionMap;
        }

        TableMetadata tableMetadata = fetchTableMetadata();

        if (tableMetadata == null && !createTableOnWrite) {
            LOG.warn("Configuration not suitable for writing RDD: output table does not exists and configuration "
                    + "object has 'createTableOnWrite' = false");

            return null;
        } else if (tableMetadata == null) {
            return null;
        }

        initColumnDefinitionMap(tableMetadata);

        return columnDefinitionMap;
    }

    private void initColumnDefinitionMap(TableMetadata tableMetadata) {
        columnDefinitionMap = new HashMap<>();

        List<ColumnMetadata> partitionKeys = tableMetadata.getPartitionKey();
        List<ColumnMetadata> clusteringKeys = tableMetadata.getClusteringColumns();
        List<ColumnMetadata> allColumns = tableMetadata.getColumns();

        for (ColumnMetadata key : partitionKeys) {
            Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.TRUE, Boolean.FALSE);
            columnDefinitionMap.put(key.getName(), metadata);
        }

        for (ColumnMetadata key : clusteringKeys) {
            Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.FALSE, Boolean.TRUE);
            columnDefinitionMap.put(key.getName(), metadata);
        }

        for (ColumnMetadata key : allColumns) {
            Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.FALSE, Boolean.FALSE);
            if (!columnDefinitionMap.containsKey(key.getName())) {
                columnDefinitionMap.put(key.getName(), metadata);
            }
        }
        columnDefinitionMap = Collections.unmodifiableMap(columnDefinitionMap);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#columnFamily(java.lang.String)
     */

    @Override
    public CassandraDeepJobConfig<T> columnFamily(String columnFamily) {
        this.table = columnFamily;

        return this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#columnFamily(java.lang.String)
     */

    @Override
    public CassandraDeepJobConfig<T> table(String table) {
        return columnFamily(table);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#getColumnFamily()
     */

    @Override
    public String getColumnFamily() {
        checkInitialized();
        return table;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#getColumnFamily()
     */

    @Override
    public String getTable() {
        return getColumnFamily();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#getKeyspace()
     */

    @Override
    public String getKeyspace() {
        checkInitialized();
        return catalog;
    }

    @Override
    public String getPartitionerClassName() {
        checkInitialized();
        return partitionerClassName;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#getPassword()
     */

    @Override
    public String getPassword() {
        checkInitialized();
        return password;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.stratio.deep.config.IICassandraDeepJobConfig#getRpcPort()
     */

    @Override
    public Integer getRpcPort() {
        checkInitialized();
        return rpcPort;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public Integer getCqlPort() {
        checkInitialized();
        return cqlPort;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> initialize() {
        if (isInitialized) {
            return this;
        }

        if (StringUtils.isEmpty(getHost())) {
            try {
                host.add(InetAddress.getLocalHost().getCanonicalHostName());
            } catch (UnknownHostException e) {
                LOG.warn("Cannot resolve local host canonical name, using \"localhost\"");
                host.add(InetAddress.getLoopbackAddress().getCanonicalHostName());
            }
        }

        validate();

        columnDefinitions();
        isInitialized = Boolean.TRUE;

        return this;
    }

    @Override
    public CassandraDeepJobConfig<T> initialize(ExtractorConfig extractorConfig) {
        super.initialize(extractorConfig);

        Map<String, Serializable> values = extractorConfig.getValues();

        if (values.get(BATCHSIZE) != null) {
            batchSize(extractorConfig.getInteger(BATCHSIZE));
        }

        if (values.get(CQLPORT) != null) {
            cqlPort(extractorConfig.getInteger(CQLPORT));
        }

        if (values.get(RPCPORT) != null) {
            rpcPort(extractorConfig.getInteger(RPCPORT));
        }

        if (values.get(CREATE_ON_WRITE) != null) {
            createTableOnWrite(extractorConfig.getBoolean(CREATE_ON_WRITE));
        }

        if (values.get(PAGE_SIZE) != null) {
            pageSize(extractorConfig.getInteger(PAGE_SIZE));
        }

        if (values.get(READ_CONSISTENCY_LEVEL) != null) {
            readConsistencyLevel(extractorConfig.getString(READ_CONSISTENCY_LEVEL));
        }

        if (values.get(WRITE_CONSISTENCY_LEVEL) != null) {
            writeConsistencyLevel(extractorConfig.getString(WRITE_CONSISTENCY_LEVEL));
        }

        if (values.get(BISECT_FACTOR) != null) {
            bisectFactor(extractorConfig.getInteger(BISECT_FACTOR));
        }

        //        if (values.get(ExtractorConstants.FILTER_FIELD) != null) {
        //            Pair<String, Serializable> filterFields = extractorConfig.getPair(ExtractorConstants.FILTER_FIELD,
        //                    String.class, Serializable.class);
        //            filterByField(filterFields.left, filterFields.right);
        //        }

        if (values.get(ExtractorConstants.FILTER_QUERY) != null) {
            filters(extractorConfig.getFilterArray(ExtractorConstants.FILTER_QUERY));

        }

        if (values.get(ExtractorConstants.EQUALS_IN_FILTER) != null) {
            setEqualsInValue((EqualsInValue) extractorConfig.getValue(EqualsInValue.class,
                    ExtractorConstants.EQUALS_IN_FILTER));
        }

        this.initialize();

        return this;
    }

    @Override
    public CassandraDeepJobConfig<T> filters(Filter... filters) {
        this.filters = filters;

        return this;
    }

    @Override
    public Filter[] getFilters() {
        return filters;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> keyspace(String keyspace) {
        this.catalog = keyspace;
        return this;
    }

    @Override
    public CassandraDeepJobConfig<T> bisectFactor(int bisectFactor) {
        this.isSplitModeSet = false;
        this.isBisectModeSet = true;
        this.bisectFactor = bisectFactor;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> partitioner(String partitionerClassName) {
        this.partitionerClassName = partitionerClassName;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> password(String password) {
        this.password = password;

        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> rpcPort(Integer port) {
        this.rpcPort = port;

        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> cqlPort(Integer port) {
        this.cqlPort = port;

        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> username(String username) {
        this.username = username;

        return this;
    }

    /**
     * Validates if any of the mandatory fields have been configured or not. Throws an {@link IllegalArgumentException}
     * if any of the mandatory properties have not been configured.
     */
    void validate() {
        validateCassandraParams();

        if (pageSize <= 0) {
            throw new IllegalArgumentException("pageSize cannot be zero");
        }

        validateConsistencyLevels();

        TableMetadata tableMetadata = fetchTableMetadata();

        validateTableMetadata(tableMetadata);
        validateAdditionalFilters(tableMetadata);

        if (!(this.isBisectModeSet && this.isSplitModeSet)) {
            if (this.isBisectModeSet) {
                if (this.bisectFactor != Constants.DEFAULT_BISECT_FACTOR
                        && !this.checkIsPowerOfTwo(this.bisectFactor)) {
                    throw new IllegalArgumentException(
                            "Bisect factor should be greater than zero and a power of 2");
                }
            } else if (this.isSplitModeSet) {
                if (this.splitSize <= 0) {
                    throw new IllegalArgumentException("The split size must be a positve integer");
                }
            } else {
                throw new IllegalArgumentException(
                        "One split mode must be defined, please choose between Split or Bisect");
            }
        } else {
            throw new IllegalArgumentException(
                    "Only one split mode can be defined, please choose between Split or Bisect");
        }
    }

    private void validateCassandraParams() {
        if (StringUtils.isEmpty(getHost())) {
            throw new IllegalArgumentException("host cannot be null");
        }

        if (rpcPort == null) {
            throw new IllegalArgumentException("rpcPort cannot be null");
        }

        if (StringUtils.isEmpty(catalog)) {
            throw new IllegalArgumentException("keyspace cannot be null");
        }

        if (StringUtils.isEmpty(table)) {
            throw new IllegalArgumentException("columnFamily cannot be null");
        }
    }

    private void validateTableMetadata(TableMetadata tableMetadata) {

        if (tableMetadata == null && !isWriteConfig) {
            throw new IllegalArgumentException(
                    String.format("Column family {%s.%s} does not exist", catalog, table));
        }

        if (tableMetadata == null && !createTableOnWrite) {
            throw new IllegalArgumentException(String.format(
                    "Column family {%s.%s} does not exist and " + "createTableOnWrite = false", catalog, table));
        }

        if (!ArrayUtils.isEmpty(inputColumns)) {
            for (String column : inputColumns) {
                assert tableMetadata != null;
                ColumnMetadata columnMetadata = tableMetadata.getColumn(column);

                if (columnMetadata == null) {
                    throw new DeepNoSuchFieldException("No column with name " + column + " has been found on table "
                            + this.catalog + "." + this.table);
                }
            }
        }

    }

    private void validateAdditionalFilters(TableMetadata tableMetadata) {
        for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) {
            /* check if there's an index specified on the provided column */
            ColumnMetadata columnMetadata = tableMetadata.getColumn(entry.getKey());

            if (columnMetadata == null) {
                throw new DeepNoSuchFieldException("No column with name " + entry.getKey() + " has been found on "
                        + "table " + this.catalog + "." + this.table);
            }

            if (columnMetadata.getIndex() == null) {
                throw new DeepIndexNotFoundException("No index has been found on column " + columnMetadata.getName()
                        + " on table " + this.catalog + "." + this.table);
            }
        }
    }

    private void validateConsistencyLevels() {
        if (readConsistencyLevel != null) {
            try {
                ConsistencyLevel.valueOf(readConsistencyLevel);

            } catch (Exception e) {
                throw new IllegalArgumentException("readConsistencyLevel not valid, "
                        + "should be one of thos defined in org.apache.cassandra.db.ConsistencyLevel", e);
            }
        }

        if (writeConsistencyLevel != null) {
            try {
                ConsistencyLevel.valueOf(writeConsistencyLevel);

            } catch (Exception e) {
                throw new IllegalArgumentException("writeConsistencyLevel not valid, "
                        + "should be one of those defined in org.apache.cassandra.db.ConsistencyLevel", e);
            }
        }
    }

    private boolean checkIsPowerOfTwo(int n) {
        return (n > 0) && ((n & (n - 1)) == 0);
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> batchSize(int batchSize) {
        this.batchSize = batchSize;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public Boolean isCreateTableOnWrite() {
        return createTableOnWrite;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> createTableOnWrite(Boolean createTableOnWrite) {
        this.createTableOnWrite = createTableOnWrite;
        this.isWriteConfig = createTableOnWrite;

        return this;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Map<String, Serializable> getAdditionalFilters() {
        return Collections.unmodifiableMap(additionalFilters);
    }

    public int getPageSize() {
        checkInitialized();
        return this.pageSize;
    }

    public CassandraDeepJobConfig<T> pageSize(int pageSize) {
        this.pageSize = pageSize;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public String getReadConsistencyLevel() {
        return readConsistencyLevel;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public String getWriteConsistencyLevel() {
        return writeConsistencyLevel;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> readConsistencyLevel(String level) {
        this.readConsistencyLevel = level;

        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public CassandraDeepJobConfig<T> writeConsistencyLevel(String level) {
        this.writeConsistencyLevel = level;
        return this;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public int getBatchSize() {
        return batchSize;
    }

    /**
     * {@inheritDoc}
     */

    @Override
    public Boolean getIsWriteConfig() {
        return isWriteConfig;
    }

    @Override
    public int getBisectFactor() {
        return bisectFactor;
    }

    // TODO: It will be added in a future release
    @Override
    public CassandraDeepJobConfig<T> splitSize(int splitSize) {
        // this.isSplitModeSet = true;
        // this.isBisectModeSet = false;
        // this.splitSize = splitSize;
        return this;
    }

    @Override
    public Integer getSplitSize() {
        return this.splitSize;
    }

    @Override
    public boolean isSplitModeSet() {
        return this.isSplitModeSet;
    }

    @Override
    public boolean isBisectModeSet() {
        return this.isBisectModeSet;
    }

    public EqualsInValue getEqualsInValue() {
        return equalsInValue;
    }

    public void setEqualsInValue(EqualsInValue equalsInValue) {
        this.equalsInValue = equalsInValue;
    }

    public static ProtocolVersion PROTOCOL_VERSION = ProtocolVersion.V2;

}