org.apache.rya.indexing.accumulo.ConfigUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.rya.indexing.accumulo.ConfigUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.rya.indexing.accumulo;

import static java.util.Objects.requireNonNull;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.ZooKeeperInstance;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.log4j.Logger;
import org.apache.rya.accumulo.AccumuloRdfConfiguration;
import org.apache.rya.accumulo.utils.ConnectorFactory;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
import org.apache.rya.api.instance.RyaDetails;
import org.apache.rya.indexing.FilterFunctionOptimizer;
import org.apache.rya.indexing.accumulo.entity.EntityCentricIndex;
import org.apache.rya.indexing.accumulo.entity.EntityOptimizer;
import org.apache.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer;
import org.apache.rya.indexing.accumulo.freetext.LuceneTokenizer;
import org.apache.rya.indexing.accumulo.freetext.Tokenizer;
import org.apache.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer;
import org.apache.rya.indexing.entity.EntityIndexOptimizer;
import org.apache.rya.indexing.entity.update.mongo.MongoEntityIndexer;
import org.apache.rya.indexing.external.PrecomputedJoinIndexer;
import org.apache.rya.indexing.mongodb.freetext.MongoFreeTextIndexer;
import org.apache.rya.indexing.mongodb.temporal.MongoTemporalIndexer;
import org.apache.rya.indexing.pcj.matching.PCJOptimizer;
import org.apache.rya.indexing.statement.metadata.matching.StatementMetadataOptimizer;
import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;

import com.google.common.base.Optional;
import com.google.common.collect.Lists;

/**
 * A set of configuration utils to read a Hadoop {@link Configuration} object and create Cloudbase/Accumulo objects.
 * Soon will deprecate this class.  Use installer for the set methods, use {@link RyaDetails} for the get methods.
 * New code must separate parameters that are set at Rya install time from that which is specific to the client.
 * Also Accumulo index tables are pushed down to the implementation and not configured in conf.
 */
public class ConfigUtils {
    private static final Logger logger = Logger.getLogger(ConfigUtils.class);

    /**
     * @Deprecated use {@link RdfCloudTripleStoreConfiguration#CONF_TBL_PREFIX} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_TBL_PREFIX = RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX;

    /**
     * @Deprecated use {@link AccumuloRdfConfiguration#CLOUDBASE_INSTANCE} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_INSTANCE = AccumuloRdfConfiguration.CLOUDBASE_INSTANCE;

    /**
     * @Deprecated use {@link AccumuloRdfConfiguration#CLOUDBASE_ZOOKEEPERS} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_ZOOKEEPERS = AccumuloRdfConfiguration.CLOUDBASE_ZOOKEEPERS;

    /**
     * @Deprecated use {@link AccumuloRdfConfiguration#CLOUDBASE_USER} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_USER = AccumuloRdfConfiguration.CLOUDBASE_USER;

    /**
     * @Deprecated use {@link AccumuloRdfConfiguration#CLOUDBASE_PASSWORD} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_PASSWORD = AccumuloRdfConfiguration.CLOUDBASE_PASSWORD;
    /**
     * @Deprecated use {@link RdfCloudTripleStoreConfiguration#CONF_QUERY_AUTH} instead.
     */
    @Deprecated
    public static final String CLOUDBASE_AUTHS = RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH;

    public static final String CLOUDBASE_WRITER_MAX_WRITE_THREADS = "sc.cloudbase.writer.maxwritethreads";
    public static final String CLOUDBASE_WRITER_MAX_LATENCY = "sc.cloudbase.writer.maxlatency";
    public static final String CLOUDBASE_WRITER_MAX_MEMORY = "sc.cloudbase.writer.maxmemory";

    public static final String FREE_TEXT_QUERY_TERM_LIMIT = "sc.freetext.querytermlimit";

    public static final String USE_FREETEXT = "sc.use_freetext";
    public static final String USE_TEMPORAL = "sc.use_temporal";
    public static final String USE_ENTITY = "sc.use_entity";
    public static final String USE_PCJ = "sc.use_pcj";
    public static final String USE_OPTIMAL_PCJ = "sc.use.optimal.pcj";
    public static final String USE_PCJ_UPDATER_INDEX = "sc.use.updater";

    public static final String FLUO_APP_NAME = "rya.indexing.pcj.fluo.fluoAppName";
    public static final String USE_PCJ_FLUO_UPDATER = "rya.indexing.pcj.updater.fluo";
    public static final String PCJ_STORAGE_TYPE = "rya.indexing.pcj.storageType";
    public static final String PCJ_UPDATER_TYPE = "rya.indexing.pcj.updaterType";

    public static final String USE_MOCK_INSTANCE = AccumuloRdfConfiguration.USE_MOCK_INSTANCE;

    public static final String NUM_PARTITIONS = "sc.cloudbase.numPartitions";

    private static final int WRITER_MAX_WRITE_THREADS = 1;
    private static final long WRITER_MAX_LATNECY = Long.MAX_VALUE;
    private static final long WRITER_MAX_MEMORY = 10000L;

    public static final String DISPLAY_QUERY_PLAN = "query.printqueryplan";

    public static final String FREETEXT_PREDICATES_LIST = "sc.freetext.predicates";
    public static final String FREETEXT_DOC_NUM_PARTITIONS = "sc.freetext.numPartitions.text";
    public static final String FREETEXT_TERM_NUM_PARTITIONS = "sc.freetext.numPartitions.term";

    public static final String TOKENIZER_CLASS = "sc.freetext.tokenizer.class";

    public static final String GEO_PREDICATES_LIST = "sc.geo.predicates";

    public static final String TEMPORAL_PREDICATES_LIST = "sc.temporal.predicates";

    public static final String USE_MONGO = "sc.useMongo";

    public static boolean isDisplayQueryPlan(final Configuration conf) {
        return conf.getBoolean(DISPLAY_QUERY_PLAN, false);
    }

    /**
     * get a value from the configuration file and throw an exception if the
     * value does not exist.
     *
     * @param conf
     * @param key
     * @return
     */
    private static String getStringCheckSet(final Configuration conf, final String key) {
        final String value = conf.get(key);
        requireNonNull(value, key + " not set");
        return value;
    }

    /**
     * @param conf
     * @param tablename
     * @return if the table was created
     * @throws AccumuloException
     * @throws AccumuloSecurityException
     * @throws TableExistsException
     */
    public static boolean createTableIfNotExists(final Configuration conf, final String tablename)
            throws AccumuloException, AccumuloSecurityException, TableExistsException {
        final TableOperations tops = getConnector(conf).tableOperations();
        if (!tops.exists(tablename)) {
            logger.info("Creating table: " + tablename);
            tops.create(tablename);
            return true;
        }
        return false;
    }

    /**
     * Lookup the table name prefix in the conf and throw an error if it is
     * null. Future, get table prefix from RyaDetails -- the Rya instance name
     * -- also getting info from the RyaDetails should happen within
     * RyaSailFactory and not ConfigUtils.
     * 
     * @param conf
     *            Rya configuration map where it extracts the prefix (instance
     *            name)
     * @return index table prefix corresponding to this Rya instance
     */
    public static String getTablePrefix(final Configuration conf) {
        final String tablePrefix;
        tablePrefix = conf.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX);
        requireNonNull(tablePrefix, "Configuration key: " + RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX
                + " not set.  Cannot generate table name.");
        return tablePrefix;
    }

    public static int getFreeTextTermLimit(final Configuration conf) {
        return conf.getInt(FREE_TEXT_QUERY_TERM_LIMIT, 100);
    }

    public static Set<URI> getFreeTextPredicates(final Configuration conf) {
        return getPredicates(conf, FREETEXT_PREDICATES_LIST);
    }

    public static Set<URI> getGeoPredicates(final Configuration conf) {
        return getPredicates(conf, GEO_PREDICATES_LIST);
    }

    /**
     * Used for indexing statements about date & time instances and intervals.
     * 
     * @param conf
     * @return Set of predicate URI's whose objects should be date time
     *         literals.
     */
    public static Set<URI> getTemporalPredicates(final Configuration conf) {
        return getPredicates(conf, TEMPORAL_PREDICATES_LIST);
    }

    protected static Set<URI> getPredicates(final Configuration conf, final String confName) {
        final String[] validPredicateStrings = conf.getStrings(confName, new String[] {});
        final Set<URI> predicates = new HashSet<>();
        for (final String prediateString : validPredicateStrings) {
            predicates.add(new URIImpl(prediateString));
        }
        return predicates;
    }

    public static Tokenizer getFreeTextTokenizer(final Configuration conf) {
        final Class<? extends Tokenizer> c = conf.getClass(TOKENIZER_CLASS, LuceneTokenizer.class, Tokenizer.class);
        return ReflectionUtils.newInstance(c, conf);
    }

    public static BatchWriter createDefaultBatchWriter(final String tablename, final Configuration conf)
            throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
        final Long DEFAULT_MAX_MEMORY = getWriterMaxMemory(conf);
        final Long DEFAULT_MAX_LATENCY = getWriterMaxLatency(conf);
        final Integer DEFAULT_MAX_WRITE_THREADS = getWriterMaxWriteThreads(conf);
        final Connector connector = ConfigUtils.getConnector(conf);
        return connector.createBatchWriter(tablename, DEFAULT_MAX_MEMORY, DEFAULT_MAX_LATENCY,
                DEFAULT_MAX_WRITE_THREADS);
    }

    public static MultiTableBatchWriter createMultitableBatchWriter(final Configuration conf)
            throws AccumuloException, AccumuloSecurityException {
        final Long DEFAULT_MAX_MEMORY = getWriterMaxMemory(conf);
        final Long DEFAULT_MAX_LATENCY = getWriterMaxLatency(conf);
        final Integer DEFAULT_MAX_WRITE_THREADS = getWriterMaxWriteThreads(conf);
        final Connector connector = ConfigUtils.getConnector(conf);
        return connector.createMultiTableBatchWriter(DEFAULT_MAX_MEMORY, DEFAULT_MAX_LATENCY,
                DEFAULT_MAX_WRITE_THREADS);
    }

    public static Scanner createScanner(final String tablename, final Configuration conf)
            throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
        final Connector connector = ConfigUtils.getConnector(conf);
        final Authorizations auths = ConfigUtils.getAuthorizations(conf);
        return connector.createScanner(tablename, auths);

    }

    public static BatchScanner createBatchScanner(final String tablename, final Configuration conf)
            throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
        final Connector connector = ConfigUtils.getConnector(conf);
        final Authorizations auths = ConfigUtils.getAuthorizations(conf);
        Integer numThreads = null;
        if (conf instanceof RdfCloudTripleStoreConfiguration) {
            numThreads = ((RdfCloudTripleStoreConfiguration) conf).getNumThreads();
        } else {
            numThreads = conf.getInt(RdfCloudTripleStoreConfiguration.CONF_NUM_THREADS, 2);
        }
        return connector.createBatchScanner(tablename, auths, numThreads);
    }

    public static int getWriterMaxWriteThreads(final Configuration conf) {
        return conf.getInt(CLOUDBASE_WRITER_MAX_WRITE_THREADS, WRITER_MAX_WRITE_THREADS);
    }

    public static long getWriterMaxLatency(final Configuration conf) {
        return conf.getLong(CLOUDBASE_WRITER_MAX_LATENCY, WRITER_MAX_LATNECY);
    }

    public static long getWriterMaxMemory(final Configuration conf) {
        return conf.getLong(CLOUDBASE_WRITER_MAX_MEMORY, WRITER_MAX_MEMORY);
    }

    public static String getUsername(final JobContext job) {
        return getUsername(job.getConfiguration());
    }

    /**
     * Get the Accumulo username from the configuration object that is meant to
     * be used when connecting a {@link Connector} to Accumulo.
     *
     * @param conf - The configuration object that will be interrogated. (not null)
     * @return The username if one could be found; otherwise {@code null}.
     */
    public static String getUsername(final Configuration conf) {
        return new AccumuloRdfConfiguration(conf).getUsername();
    }

    public static Authorizations getAuthorizations(final JobContext job) {
        return getAuthorizations(job.getConfiguration());
    }

    public static Authorizations getAuthorizations(final Configuration conf) {
        final String authString = conf.get(RdfCloudTripleStoreConfiguration.CONF_QUERY_AUTH, "");
        if (authString.isEmpty()) {
            return new Authorizations();
        }
        return new Authorizations(authString.split(","));
    }

    public static Instance getInstance(final JobContext job) {
        return getInstance(job.getConfiguration());
    }

    /**
     * Create an {@link Instance} that may be used to create {@link Connector}s
     * to Accumulo. If the configuration has the {@link #USE_MOCK_INSTANCE} flag
     * set, then the instance will be be a {@link MockInstance} instead of a
     * Zookeeper backed instance.
     *
     * @param conf - The configuration object that will be interrogated. (not null)
     * @return The {@link Instance} that may be used to connect to Accumulo.
     */
    public static Instance getInstance(final Configuration conf) {
        // Pull out the Accumulo specific configuration values.
        final AccumuloRdfConfiguration accConf = new AccumuloRdfConfiguration(conf);
        String instanceName = accConf.getInstanceName();
        String zoookeepers = accConf.getZookeepers();

        // Create an Instance a mock if the mock flag is set.
        if (useMockInstance(conf)) {
            return new MockInstance(instanceName);
        }

        // Otherwise create an Instance to a Zookeeper managed instance of Accumulo.
        return new ZooKeeperInstance(instanceName, zoookeepers);
    }

    public static String getPassword(final JobContext job) {
        return getPassword(job.getConfiguration());
    }

    /**
     * Get the Accumulo password from the configuration object that is meant to
     * be used when connecting a {@link Connector} to Accumulo.
     *
     * @param conf - The configuration object that will be interrogated. (not null)
     * @return The password if one could be found; otherwise an empty string.
     */
    public static String getPassword(final Configuration conf) {
        return new AccumuloRdfConfiguration(conf).getPassword();
    }

    public static Connector getConnector(final JobContext job) throws AccumuloException, AccumuloSecurityException {
        return getConnector(job.getConfiguration());
    }

    /**
     * Create an Accumulo {@link Connector} using the configured connection information.
     * If the connection information  points to a mock instance of Accumulo, then the
     * {@link #USE_MOCK_INSTANCE} flag must be set.
     *
     * @param conf - Configures how the connector will be built. (not null)
     * @return A {@link Connector} that may be used to interact with the configured Accumulo instance.
     * @throws AccumuloException The connector couldn't be created because of an Accumulo problem.
     * @throws AccumuloSecurityException The connector couldn't be created because of an Accumulo security violation.
     */
    public static Connector getConnector(final Configuration conf)
            throws AccumuloException, AccumuloSecurityException {
        return ConnectorFactory.connect(new AccumuloRdfConfiguration(conf));
    }

    /**
     * Indicates that a Mock instance of Accumulo is being used to back the Rya instance.
     *
     * @param conf - The configuration object that will be interrogated. (not null)
     * @return {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}.
     */
    public static boolean useMockInstance(final Configuration conf) {
        return new AccumuloRdfConfiguration(conf).useMockInstance();
    }

    protected static int getNumPartitions(final Configuration conf) {
        return conf.getInt(NUM_PARTITIONS, 25);
    }

    public static int getFreeTextDocNumPartitions(final Configuration conf) {
        return conf.getInt(FREETEXT_DOC_NUM_PARTITIONS, getNumPartitions(conf));
    }

    public static int getFreeTextTermNumPartitions(final Configuration conf) {
        return conf.getInt(FREETEXT_TERM_NUM_PARTITIONS, getNumPartitions(conf));
    }

    public static boolean getUseFreeText(final Configuration conf) {
        return conf.getBoolean(USE_FREETEXT, false);
    }

    public static boolean getUseTemporal(final Configuration conf) {
        return conf.getBoolean(USE_TEMPORAL, false);
    }

    public static boolean getUseEntity(final Configuration conf) {
        return conf.getBoolean(USE_ENTITY, false);
    }

    public static boolean getUsePCJ(final Configuration conf) {
        return conf.getBoolean(USE_PCJ, false);
    }

    public static boolean getUseOptimalPCJ(final Configuration conf) {
        return conf.getBoolean(USE_OPTIMAL_PCJ, false);
    }

    public static boolean getUsePcjUpdaterIndex(final Configuration conf) {
        return conf.getBoolean(USE_PCJ_UPDATER_INDEX, false);
    }

    /**
     * @return The name of the Fluo Application this instance of RYA is using to
     *         incrementally update PCJs.
     */
    // TODO delete this eventually and use Details table
    public Optional<String> getFluoAppName(final Configuration conf) {
        return Optional.fromNullable(conf.get(FLUO_APP_NAME));
    }

    public static boolean getUseMongo(final Configuration conf) {
        return conf.getBoolean(USE_MONGO, false);
    }

    public static void setIndexers(final RdfCloudTripleStoreConfiguration conf) {

        final List<String> indexList = Lists.newArrayList();
        final List<String> optimizers = Lists.newArrayList();

        boolean useFilterIndex = false;

        if (ConfigUtils.getUseMongo(conf)) {
            if (getUseFreeText(conf)) {
                indexList.add(MongoFreeTextIndexer.class.getName());
                useFilterIndex = true;
            }

            if (getUseEntity(conf)) {
                indexList.add(MongoEntityIndexer.class.getName());
                optimizers.add(EntityIndexOptimizer.class.getName());
            }

            if (getUseTemporal(conf)) {
                indexList.add(MongoTemporalIndexer.class.getName());
                useFilterIndex = true;
            }
        } else {
            if (getUsePCJ(conf) || getUseOptimalPCJ(conf)) {
                conf.setPcjOptimizer(PCJOptimizer.class);
            }

            if (getUsePcjUpdaterIndex(conf)) {
                indexList.add(PrecomputedJoinIndexer.class.getName());
            }

            if (getUseFreeText(conf)) {
                indexList.add(AccumuloFreeTextIndexer.class.getName());
                useFilterIndex = true;
            }

            if (getUseTemporal(conf)) {
                indexList.add(AccumuloTemporalIndexer.class.getName());
                useFilterIndex = true;
            }

            if (getUseEntity(conf)) {
                indexList.add(EntityCentricIndex.class.getName());
                optimizers.add(EntityOptimizer.class.getName());
            }
        }

        if (useFilterIndex) {
            optimizers.add(FilterFunctionOptimizer.class.getName());
        }

        if (conf.getUseStatementMetadata()) {
            optimizers.add(StatementMetadataOptimizer.class.getName());
        }

        conf.setStrings(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, indexList.toArray(new String[] {}));
        conf.setStrings(RdfCloudTripleStoreConfiguration.CONF_OPTIMIZERS, optimizers.toArray(new String[] {}));
    }
}