Java tutorial
/* * Copyright 2009-2016 DigitalGlobe, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * */ package org.mrgeo.data; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.mrgeo.core.MrGeoProperties; import org.mrgeo.data.adhoc.AdHocDataProvider; import org.mrgeo.data.adhoc.AdHocDataProviderFactory; import org.mrgeo.data.image.MrsImageDataProvider; import org.mrgeo.data.image.MrsImageDataProviderFactory; import org.mrgeo.data.vector.VectorDataProvider; import org.mrgeo.data.vector.VectorDataProviderFactory; import org.mrgeo.utils.DependencyLoader; import org.mrgeo.utils.HadoopUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.*; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; /** * This class is responsible for creating instances of several types of data providers. * A data provider is an abstraction that provides read/write access to data in a * back-end data store without knowing the actual type of the back-end data store * (e.g. HDFS, Accumulo, ...). There is an abstract data provider class for each * type of data to be accessed, including AdHoc data, and MrsImage data. * These are described below. Each instance of a data provider returned by the methods * in this class are for a specific piece of data (like a java.io.File instance references * a specific file on the disk). Data providers are implemented in data plugins * and the providers are discovered dynamically at runtime through the Java * ServiceLoader. * * MrsImage data providers access a MrGeo image (referred to as a MrsImage). A MrsImage * data provider is for writing to a MrsImage. * * AdHoc data providers are used for accessing other types of data besides raw * images and MrsImages. This data provider is used within MrGeo while performing * processing in order to save state as it works. * * Vector data providers access a vector data source. */ public class DataProviderFactory { static Logger log = LoggerFactory.getLogger(DataProviderFactory.class); public enum AccessMode { READ, WRITE, OVERWRITE } final static String PREFERRED_PROPERTYNAME = "preferred.provider"; final static String PREFERRED_ADHOC_PROPERTYNAME = "preferred.adhoc.provider"; final static String PREFERRED_MRSIMAGE_PROPERTYNAME = "preferred.image.provider"; final static String PREFERRED_VECTOR_PROPERTYNAME = "preferred.vector.provider"; final static String BASECLASS = DataProviderFactory.class.getSimpleName() + "."; final static String PREFERRED_ADHOC_PROVIDER_NAME = BASECLASS + PREFERRED_ADHOC_PROPERTYNAME; final static String PREFERRED_MRSIMAGE_PROVIDER_NAME = BASECLASS + PREFERRED_MRSIMAGE_PROPERTYNAME; final static String PREFERRED_VECTOR_PROVIDER_NAME = BASECLASS + PREFERRED_VECTOR_PROPERTYNAME; final static String DATA_PROVIDER_CONFIG_PREFIX = BASECLASS + "config."; private final static String PREFIX_CHAR = ":"; // use ":" for the prefix delimiter private final static int PROVIDER_CACHE_SIZE = 50; private final static int PROVIDER_CACHE_EXPIRE = 10; // minutes private static Configuration basicConf; private static Map<String, String> configSettings; /** * The key for the caller's user name that MrGeo will include in provider properties * objects passed to methods in this API. If the MrGeo installation is not secured, * then this property will not be included in provider properties, and provider * properties may be null. */ public static final String PROVIDER_PROPERTY_USER_NAME = "mrgeo.security.user.name"; /** * The key for the caller's security roles (comma-delimited) that MrGeo will include * in provider properties objects passed to methods in this API. If the MrGeo * installation is not secured, then this property will not be included in provider * properties, and provider properties may be null. */ public static final String PROVIDER_PROPERTY_USER_ROLES = "mrgeo.security.user.roles"; public static void saveProviderPropertiesToConfig(final ProviderProperties providerProperties, final Configuration conf) { log.debug("Saving provider properties to config"); if (providerProperties != null) { conf.set(PROVIDER_PROPERTY_USER_NAME, providerProperties.getUserName()); conf.set(PROVIDER_PROPERTY_USER_ROLES, StringUtils.join(providerProperties.getRoles(), ",")); } // Also, we want to save the configuration settings for each data provider // in the Configuration as well so they can be re-instantiated on the remote // side of a map/reduce 1 job. Map<String, String> configSettings = getConfigurationFromProviders(); log.debug("Saving " + configSettings.size() + " configuration settings from data providers to config"); Set<String> keys = configSettings.keySet(); for (String key : keys) { conf.set(DATA_PROVIDER_CONFIG_PREFIX + key, configSettings.get(key)); } } public static ProviderProperties loadProviderPropertiesFromConfig(Configuration conf) { // Tell each data provider to load their config settings from the Configuration. // This is the inverse operation to saveProviderPropertiesToConfig. Iterator<Map.Entry<String, String>> iter = conf.iterator(); Map<String, String> configSettings = new HashMap<String, String>(); int prefixLen = DATA_PROVIDER_CONFIG_PREFIX.length(); while (iter.hasNext()) { Map.Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(DATA_PROVIDER_CONFIG_PREFIX)) { configSettings.put(entry.getKey().substring(prefixLen), entry.getValue()); } } setConfigurationForProviders(configSettings); String userName = conf.get(PROVIDER_PROPERTY_USER_NAME, ""); List<String> roles = new ArrayList<String>(); String strRoles = conf.get(PROVIDER_PROPERTY_USER_ROLES, ""); if (strRoles != null && !strRoles.isEmpty()) { String[] separated = strRoles.split(","); for (String r : separated) { roles.add(r); } } return new ProviderProperties(userName, roles); } public static Map<String, String> getConfigurationFromProviders() { Map<String, String> result = new HashMap<String, String>(); try { initialize(getBasicConfig()); } catch (DataProviderException e) { log.error("Unable to initialize data providers", e); return result; } if (adHocProviderFactories != null) { for (final AdHocDataProviderFactory dpf : adHocProviderFactories.values()) { Map<String, String> p = dpf.getConfiguration(); if (p != null) { log.debug("Got " + p.size() + " config settings from " + dpf.getClass().getName()); result.putAll(p); } else { log.debug("Got no config settings from " + dpf.getClass().getName()); } } } if (mrsImageProviderFactories != null) { for (final MrsImageDataProviderFactory dpf : mrsImageProviderFactories.values()) { Map<String, String> p = dpf.getConfiguration(); if (p != null) { log.debug("Got " + p.size() + " config settings from " + dpf.getClass().getName()); result.putAll(p); } else { log.debug("Got no config settings from " + dpf.getClass().getName()); } } } if (vectorProviderFactories != null) { for (final VectorDataProviderFactory dpf : vectorProviderFactories.values()) { Map<String, String> p = dpf.getConfiguration(); if (p != null) { log.debug("Got " + p.size() + " config settings from " + dpf.getClass().getName()); result.putAll(p); } else { log.debug("Got no config settings from " + dpf.getClass().getName()); } } } return result; } public static void setConfigurationForProviders(Map<String, String> properties) { if (log.isInfoEnabled()) { if (properties != null) { log.debug("Config settings passed to all data providers has size " + properties.size()); } else { log.debug("Config settings passed to all data providers is empty"); } } configSettings = properties; } private static class AdHocLoader implements Callable<AdHocDataProvider> { private String prefix; private String name; private AccessMode accessMode; private Configuration conf; private ProviderProperties props; public AdHocLoader(final String name, final AccessMode accessMode, final Configuration conf, final ProviderProperties props) { this.conf = conf; this.props = props; this.prefix = getPrefix(name); if (prefix != null) { this.name = name.substring(this.prefix.length() + PREFIX_CHAR.length()); } else { this.name = name; } this.accessMode = accessMode; } @Override public AdHocDataProvider call() throws Exception { initialize(conf); final AdHocDataProviderFactory factory = findFactory(); if (accessMode == AccessMode.READ) { if (factory != null) { if (factory.canOpen(name, props)) { return factory.createAdHocDataProvider(name, props); } else { log.info("Could not open " + name + " using factory " + factory.getClass().getName()); } } throw new DataProviderNotFound("Unable to find an ad hoc data provider for " + name); } else if (accessMode == AccessMode.OVERWRITE) { if (factory != null) { if (factory.exists(name, props)) { factory.delete(name, props); } return factory.createAdHocDataProvider(name, props); } return getPreferredProvider().createAdHocDataProvider(name, props); } else { if (factory != null) { if (factory.canWrite(name, props)) { return factory.createAdHocDataProvider(name, props); } throw new DataProviderNotFound("Unable to find an ad hoc data provider for " + name); } return getPreferredProvider().createAdHocDataProvider(name, props); } } private AdHocDataProviderFactory getPreferredProvider() throws DataProviderNotFound { if (adHocProviderFactories.containsKey(preferredAdHocProviderName)) { return adHocProviderFactories.get(preferredAdHocProviderName); } throw new DataProviderNotFound("No ad hoc data providers found "); } private AdHocDataProviderFactory findFactory() throws IOException { if (prefix != null) { if (adHocProviderFactories.containsKey(prefix)) { return adHocProviderFactories.get(prefix); } } for (final AdHocDataProviderFactory factory : adHocProviderFactories.values()) { if (factory.exists(name, props)) { return factory; } log.debug("resource cache load: " + name); } return null; } } private static class MrsImageLoader implements Callable<MrsImageDataProvider> { private String prefix; private String name; private AccessMode accessMode; private Configuration conf; private ProviderProperties props; public MrsImageLoader(final String name, final AccessMode accessMode, final Configuration conf, final ProviderProperties props) { this.conf = conf; this.props = props; this.prefix = getPrefix(name); if (prefix != null) { this.name = name.substring(this.prefix.length() + PREFIX_CHAR.length()); } else { this.name = name; } this.accessMode = accessMode; } @Override public MrsImageDataProvider call() throws Exception { initialize(conf); final MrsImageDataProviderFactory factory = findFactory(); if (accessMode == AccessMode.READ) { if (factory != null) { if (factory.canOpen(name, props)) { return factory.createMrsImageDataProvider(name, props); } else { log.warn("Could not open " + name + " using factory " + factory.getClass().getName()); } } throw new DataProviderNotFound("Unable to find a MrsImage data provider for " + name); } else if (accessMode == AccessMode.OVERWRITE) { if (factory != null) { if (factory.exists(name, props)) { factory.delete(name, props); } return factory.createMrsImageDataProvider(name, props); } return getPreferredProvider().createMrsImageDataProvider(name, props); } else { if (factory != null) { if (factory.canWrite(name, props)) { return factory.createMrsImageDataProvider(name, props); } throw new DataProviderNotFound("Unable to find a MrsImage data provider for " + name); } return getPreferredProvider().createMrsImageDataProvider(name, props); } } private MrsImageDataProviderFactory getPreferredProvider() throws DataProviderNotFound { if (mrsImageProviderFactories.containsKey(preferredImageProviderName)) { return mrsImageProviderFactories.get(preferredImageProviderName); } throw new DataProviderNotFound("No MrsImage data providers found "); } private MrsImageDataProviderFactory findFactory() throws IOException { if (prefix != null) { if (mrsImageProviderFactories.containsKey(prefix)) { if (log.isDebugEnabled()) { log.debug("returning " + mrsImageProviderFactories.get(prefix).getClass().getName()); } return mrsImageProviderFactories.get(prefix); } else { if (log.isInfoEnabled()) { log.info("No image data provider matches prefix " + prefix); } } } for (final MrsImageDataProviderFactory factory : mrsImageProviderFactories.values()) { if (factory.exists(name, props)) { if (log.isDebugEnabled()) { log.debug("Returning provider " + factory.getClass().getName() + " for image " + name); } return factory; } else { if (log.isInfoEnabled()) { log.info("Image " + name + " does not exist for provider " + factory.getClass().getName()); } } } return null; } } private static class VectorLoader implements Callable<VectorDataProvider> { private String name; private String prefix; private AccessMode accessMode; private Configuration conf; private ProviderProperties props; public VectorLoader(final String name, final AccessMode accessMode, final Configuration conf, final ProviderProperties props) { this.conf = conf; if (conf == null && props == null) { this.props = new ProviderProperties(); } else { this.props = props; } this.prefix = getPrefix(name); if (prefix != null) { this.name = name.substring(this.prefix.length() + PREFIX_CHAR.length()); } else { this.name = name; } this.accessMode = accessMode; } @Override public VectorDataProvider call() throws Exception { initialize(conf); final VectorDataProviderFactory factory = findFactory(); if (accessMode == AccessMode.READ) { if (factory != null) { if (log.isDebugEnabled()) { log.debug("For vector " + name + ", found factory: " + factory.getClass().getName()); } if (factory.canOpen(name, props)) { if (log.isDebugEnabled()) { log.debug( "Factory " + factory.getClass().getName() + " is able to open vector " + name); } return factory.createVectorDataProvider(prefix, name, props); } else { if (log.isInfoEnabled()) { log.info("Unable to open vector " + name + " with data provider " + factory.getClass().getName()); } } } else { log.info("Unable to find a data provider to use for vector " + name); } // Log some useful debug information String msg = "Unable to find a vector data provider for " + name + " using prefix " + prefix; if (log.isDebugEnabled()) { log.debug(msg); log.debug("Available vector provider factories: " + vectorProviderFactories.size()); for (VectorDataProviderFactory f : vectorProviderFactories.values()) { log.debug(f.getPrefix() + " using " + f.getClass().getName()); } String cp = System.getProperty("java.class.path"); log.debug("java.class.path=" + cp); } throw new DataProviderNotFound(msg); } else if (accessMode == AccessMode.OVERWRITE) { if (factory != null) { if (factory.exists(name, props)) { factory.delete(name, props); } return factory.createVectorDataProvider(prefix, name, props); } return getPreferredProvider().createVectorDataProvider(prefix, name, props); } else { if (factory != null) { if (factory.canWrite(name, props)) { return factory.createVectorDataProvider(prefix, name, props); } String msg = "Unable to find a vector data provider for " + name + " using prefix " + prefix; if (log.isDebugEnabled()) { log.debug(msg); log.debug("Available vector provider factories: " + vectorProviderFactories.size()); for (VectorDataProviderFactory f : vectorProviderFactories.values()) { log.debug(f.getPrefix() + " using " + f.getClass().getName()); } String cp = System.getProperty("java.class.path"); log.debug("java.class.path=" + cp); } throw new DataProviderNotFound(msg); } return getPreferredProvider().createVectorDataProvider(prefix, name, props); } } private VectorDataProviderFactory getPreferredProvider() throws DataProviderNotFound { if (vectorProviderFactories.containsKey(preferredVectorProviderName)) { return vectorProviderFactories.get(preferredVectorProviderName); } throw new DataProviderNotFound("No vector data providers found "); } private VectorDataProviderFactory findFactory() throws IOException { boolean debugEnabled = log.isDebugEnabled(); if (debugEnabled) { log.debug("Looking for factory for prefix: " + ((prefix != null) ? prefix : "null") + " and name " + name); log.debug("Vector factory count = " + vectorProviderFactories.size()); } if (prefix != null) { if (vectorProviderFactories.containsKey(prefix)) { if (debugEnabled) { log.debug("Returning factory from prefix cache: " + vectorProviderFactories.get(prefix).getClass().getName()); } return vectorProviderFactories.get(prefix); } } for (final VectorDataProviderFactory factory : vectorProviderFactories.values()) { if (debugEnabled) { log.debug("Checking factory: " + factory.getClass().getName()); } if (factory.exists(name, props)) { if (debugEnabled) { log.debug("Returning factory from provider properties: " + factory.getClass().getName()); } return factory; } if (debugEnabled) { log.debug("resource cache load: " + name); } } if (debugEnabled) { log.debug("Returning null factory"); } return null; } } private static Cache<String, AdHocDataProvider> adHocProviderCache = CacheBuilder.newBuilder() .maximumSize(PROVIDER_CACHE_SIZE).expireAfterAccess(PROVIDER_CACHE_EXPIRE, TimeUnit.MINUTES) .removalListener(new RemovalListener<String, AdHocDataProvider>() { @Override public void onRemoval(final RemovalNotification<String, AdHocDataProvider> notification) { log.debug("resource cache removal: " + notification.getKey()); } }).build(); private static Cache<String, MrsImageDataProvider> mrsImageProviderCache = CacheBuilder.newBuilder() .maximumSize(PROVIDER_CACHE_SIZE).expireAfterAccess(PROVIDER_CACHE_EXPIRE, TimeUnit.MINUTES) .removalListener(new RemovalListener<String, MrsImageDataProvider>() { @Override public void onRemoval(final RemovalNotification<String, MrsImageDataProvider> notification) { log.debug("resource cache removal: " + notification.getKey()); } }).build(); private static Cache<String, VectorDataProvider> vectorProviderCache = CacheBuilder.newBuilder() .maximumSize(PROVIDER_CACHE_SIZE).expireAfterAccess(PROVIDER_CACHE_EXPIRE, TimeUnit.MINUTES) .removalListener(new RemovalListener<String, VectorDataProvider>() { @Override public void onRemoval(final RemovalNotification<String, VectorDataProvider> notification) { log.debug("resource cache removal: " + notification.getKey()); } }).build(); protected static Map<String, AdHocDataProviderFactory> adHocProviderFactories; protected static Map<String, MrsImageDataProviderFactory> mrsImageProviderFactories; protected static Map<String, VectorDataProviderFactory> vectorProviderFactories; protected static String preferredAdHocProviderName = null; protected static String preferredImageProviderName = null; protected static String preferredVectorProviderName = null; // public static TileDataProvider getDataProvider(final String name) throws DataProviderNotFound // { // initialize(); // for (TileDataProvider dp : mrsImageProviders) // { // if (dp.canOpen(name)) // { // return dp; // } // } // throw new DataProviderNotFound("Unable to find a data provider for " + name); // } /** * Create a data provider for a new ad hoc data source with a randomly generated * name. Use this method if you don't care what the name of the data source is. * This is useful for storing temporary data that will be accessed during * processing, and then deleted after processing completes. This method should * be called from the name node side, not from mappers or reducers. * * TODO: Add information about how it chooses the appropriate provider for the * name passed in. * * TODO: When ad hoc data is implemented in Accumulo, this method will need to * be changed so that it receives the providerProperties argument just like * the getMrsImageDataProvider method. * * @return An ad hoc data provider for a newly created, randomly named resource. * @throws DataProviderNotFound */ public static AdHocDataProvider createAdHocDataProvider(final ProviderProperties providerProperties) throws DataProviderNotFound, DataProviderException { return createAdHocDataProvider(getBasicConfig(), providerProperties); } /** * Create a data provider for a new ad hoc data source with a randomly generated * name. Use this method if you don't care what the name of the data source is. * This is useful for storing temporary data that will be accessed during * processing, and then deleted after processing completes. This method should * be called from the data node side (inside mappers and reducers). * * TODO: Add information about how it chooses the appropriate provider for the * name passed in. * * @return An ad hoc data provider for a newly created, randomly named resource. * @throws DataProviderNotFound */ public static AdHocDataProvider createAdHocDataProvider(final Configuration conf) throws DataProviderNotFound, DataProviderException { return createAdHocDataProvider(conf, loadProviderPropertiesFromConfig(conf)); } private static AdHocDataProvider createAdHocDataProvider(final Configuration conf, final ProviderProperties props) throws DataProviderNotFound, DataProviderException { initialize(conf); for (final AdHocDataProviderFactory factory : adHocProviderFactories.values()) { AdHocDataProvider provider; try { provider = factory.createAdHocDataProvider(props); } catch (IOException e) { throw new DataProviderException("Can not create ad hoc data provider", e); } adHocProviderCache.put(provider.getResourceName(), provider); return provider; } throw new DataProviderNotFound("Unable to find an ad hoc data provider for "); } /** * Create a data provider for a specifically named ad hoc data source. Use this * method if you need an ad hoc data source with a name that you assign. This * would be used for accessing/storing named data that is not raw imagery being * ingested or a MrsImage. * * TODO: Add information about how it chooses the appropriate provider for the * name passed in. * * TODO: When ad hoc data is implemented in Accumulo, this method will need to * be changed so that it receives the providerProperties argument just like * the getMrsImageDataProvider method. * * @param name * @return * @throws DataProviderNotFound */ public static AdHocDataProvider getAdHocDataProvider(final String name, final AccessMode mode, final ProviderProperties providerProperties) throws DataProviderNotFound { return getAdHocDataProvider(name, mode, getBasicConfig(), providerProperties); } public static AdHocDataProvider getAdHocDataProvider(final String name, final AccessMode mode, final Configuration conf) throws DataProviderNotFound { return getAdHocDataProvider(name, mode, conf, loadProviderPropertiesFromConfig(conf)); } private static AdHocDataProvider getAdHocDataProvider(final String name, final AccessMode mode, final Configuration conf, final ProviderProperties props) throws DataProviderNotFound { try { // Make sure that ad hoc resources are cached uniquely per user String cacheKey = getResourceCacheKey(name, conf, props); if (mode == AccessMode.OVERWRITE || mode == AccessMode.WRITE) { invalidateCache(cacheKey); } return adHocProviderCache.get(cacheKey, new AdHocLoader(name, mode, conf, props)); } catch (ExecutionException e) { if (e.getCause() instanceof DataProviderNotFound) { throw (DataProviderNotFound) e.getCause(); } throw new DataProviderNotFound(e); } } /** * Returns a list of MrsImages available from all data sources. The names returned * can be subsequently passed as the name parameter to getMrsImageDataProvider(). * * @return * @throws IOException */ public static String[] listImages(final ProviderProperties providerProperties) throws IOException { initialize(getBasicConfig()); List<String> results = new ArrayList<String>(); for (final MrsImageDataProviderFactory factory : mrsImageProviderFactories.values()) { String[] images = factory.listImages(providerProperties); if (images != null && images.length > 0) { results.addAll(Arrays.asList(images)); } } String[] returnValue = new String[results.size()]; return results.toArray(returnValue); } /** * Returns a list of vectors available from all data sources. The names returned * can be subsequently passed as the name parameter to getVectorDataProvider(). * * @return * @throws IOException */ public static String[] listVectors(final ProviderProperties providerProperties) throws IOException { initialize(getBasicConfig()); List<String> results = new ArrayList<String>(); for (final VectorDataProviderFactory factory : vectorProviderFactories.values()) { String[] vectors = factory.listVectors(providerProperties); if (vectors != null && vectors.length > 0) { results.addAll(Arrays.asList(vectors)); } } String[] returnValue = new String[results.size()]; return results.toArray(returnValue); } // Be sure that provider caching is unique for different users so that we're // not using the wrong user credentials. On the server side, we include the // calling user name in the key. On the data node side, there is no need to // do this because the cache only contains providers used for one job, which // is executed for one user. private static String getResourceCacheKey(final String resourceName, final Configuration conf, final ProviderProperties providerProperties) { if (providerProperties != null) { String userName = providerProperties.getUserName(); if (userName != null && !userName.isEmpty()) { return resourceName + "," + userName; } } return resourceName; } public static MrsImageDataProvider createTempMrsImageDataProvider(ProviderProperties props) throws DataProviderNotFound, DataProviderException { return createTempMrsImageDataProvider(getBasicConfig(), props); } public static MrsImageDataProvider createTempMrsImageDataProvider(Configuration conf) throws DataProviderNotFound, DataProviderException { return createTempMrsImageDataProvider(conf, loadProviderPropertiesFromConfig(conf)); } private static MrsImageDataProvider createTempMrsImageDataProvider(final Configuration conf, final ProviderProperties providerProperties) throws DataProviderNotFound, DataProviderException { initialize(conf); for (final MrsImageDataProviderFactory factory : mrsImageProviderFactories.values()) { MrsImageDataProvider provider; try { provider = factory.createTempMrsImageDataProvider(providerProperties); } catch (IOException e) { throw new DataProviderException("Can not create temporary mrs image data provider", e); } mrsImageProviderCache.put(provider.getResourceName(), provider); return provider; } throw new DataProviderException("Can not create temporary mrs image data provider"); } /** * Create a data provider for a MrsImage data resource. This method should be * called to access a MrsImage (an image ingested into MrGeo) as well as * metadata about a MrsImage. Do not call this method from code that runs on * the data node size of Hadoop. Use the other signature that accepts a * Configuration parameter in that case. * * TODO: Add information about how it chooses the appropriate provider for the * name passed in. * * @param name * @param accessMode * @return * @throws DataProviderNotFound */ public static MrsImageDataProvider getMrsImageDataProvider(final String name, AccessMode accessMode, ProviderProperties props) throws DataProviderNotFound { return getMrsImageDataProvider(name, accessMode, getBasicConfig(), props); } public static MrsImageDataProvider getMrsImageDataProviderNoCache(final String name, AccessMode accessMode, ProviderProperties props) throws DataProviderNotFound { try { return new MrsImageLoader(name, accessMode, getBasicConfig(), props).call(); } catch (Exception e) { throw new DataProviderNotFound("Error loading " + name, e); } } /** * Create a data provider for a MrsImage data resource. This method should be * called to access a MrsImage (an image ingested into MrGeo) as well as * metadata about a MrsImage. Call this method when you need to get a data * provider from within a mapper or reducer or any other functionality that * runs on the data node side of Hadoop. * * TODO: Add information about how it chooses the appropriate provider for the * name passed in. * * @param name * @param accessMode * @param conf * @return * @throws DataProviderNotFound */ public static MrsImageDataProvider getMrsImageDataProvider(final String name, AccessMode accessMode, final Configuration conf) throws DataProviderNotFound { return getMrsImageDataProvider(name, accessMode, conf, loadProviderPropertiesFromConfig(conf)); } private static MrsImageDataProvider getMrsImageDataProvider(final String name, AccessMode accessMode, final Configuration conf, final ProviderProperties providerProperties) throws DataProviderNotFound { try { // Make sure that image resources are cached uniquely by user String cacheKey = getResourceCacheKey(name, conf, providerProperties); // If a resource was already accessed in read mode, and then again in // OVERWRITE or WRITE mode, then force the cache to re-load the resource // to execute validation beforehand if (accessMode == AccessMode.OVERWRITE || accessMode == AccessMode.WRITE) { mrsImageProviderCache.invalidate(cacheKey); } if (log.isDebugEnabled()) { log.debug("Loading from mrsImageProviderCache"); log.debug(" cacheKey: {}", cacheKey); log.debug(" name: {}", name); log.debug(" accessMode: {}", accessMode.name()); log.debug(" conf: {}", conf); log.debug(" provider properties: {}", providerProperties); } MrsImageLoader loader = new MrsImageLoader(name, accessMode, conf, providerProperties); return mrsImageProviderCache.get(cacheKey, loader); // return mrsImageProviderCache.get(cacheKey, // new MrsImageLoader(name, accessMode, conf, providerProperties)); } catch (ExecutionException e) { if (e.getCause() instanceof DataProviderNotFound) { throw (DataProviderNotFound) e.getCause(); } throw new DataProviderNotFound(e); } } /** * Create a data provider for a vector data resource. This method should be * called to access a vector as well as metadata about a vector. * * @param name * @return * @throws DataProviderNotFound */ public static VectorDataProvider getVectorDataProvider(final String name, AccessMode accessMode, ProviderProperties providerProperties) throws DataProviderNotFound { return getVectorDataProvider(name, accessMode, getBasicConfig(), providerProperties); } public static VectorDataProvider getVectorDataProvider(final String name, AccessMode accessMode, final Configuration conf) throws DataProviderNotFound { return getVectorDataProvider(name, accessMode, conf, loadProviderPropertiesFromConfig(conf)); } private static VectorDataProvider getVectorDataProvider(final String name, AccessMode accessMode, final Configuration conf, final ProviderProperties providerProperties) throws DataProviderNotFound { try { // Make sure that vector resources are cached uniquely by user String cacheKey = getResourceCacheKey(name, conf, providerProperties); // If a resource was already accessed in read mode, and then again in // OVERWRITE or WRITE mode, then force the cache to re-load the resource // to execute validation beforehand if (accessMode == AccessMode.OVERWRITE || accessMode == AccessMode.WRITE) { vectorProviderCache.invalidate(cacheKey); } return vectorProviderCache.get(cacheKey, new VectorLoader(name, accessMode, conf, providerProperties)); } catch (ExecutionException e) { if (e.getCause() instanceof DataProviderNotFound) { throw (DataProviderNotFound) e.getCause(); } throw new DataProviderNotFound(e); } } /** * Previously requested data providers are stored in a cache to speed the process * of repeated requests for a data provider for the same resource. Call this * method to force the data providers to be newly created the next time they are * requested. */ public static void invalidateCache() { adHocProviderCache.invalidateAll(); mrsImageProviderCache.invalidateAll(); vectorProviderCache.invalidateAll(); } /** * Similar to invalidateCache(), except it invalidates a specific resource instead * of all resources. * @param resource */ public static void invalidateCache(final String resource) { if (resource != null && !resource.isEmpty()) { adHocProviderCache.invalidate(resource); mrsImageProviderCache.invalidate(resource); vectorProviderCache.invalidate(resource); log.debug("invalidating cache: " + resource); } } /** * Deletes the specified resource. * * @param resource * @param providerProperties * @throws IOException */ public static void delete(final String resource, final ProviderProperties providerProperties) throws IOException { MrsImageDataProvider mrsImageProvider = getMrsImageDataProvider(resource, AccessMode.OVERWRITE, providerProperties); if (mrsImageProvider != null) { mrsImageProvider.delete(); mrsImageProviderCache.invalidate(resource); return; } AdHocDataProvider adHocProvider = getAdHocDataProvider(resource, AccessMode.OVERWRITE, providerProperties); if (adHocProvider != null) { adHocProvider.delete(); adHocProviderCache.invalidate(resource); return; } VectorDataProvider vectorProvider = getVectorDataProvider(resource, AccessMode.OVERWRITE, providerProperties); if (vectorProvider != null) { vectorProvider.delete(); vectorProviderCache.invalidate(resource); return; } } protected static void initialize(final Configuration conf) throws DataProviderException { if (adHocProviderFactories == null) { log.info("Initializing ad hoc provider factories"); adHocProviderFactories = new HashMap<String, AdHocDataProviderFactory>(); // Find the mrsImageProviders final ServiceLoader<AdHocDataProviderFactory> dataProviderLoader = ServiceLoader .load(AdHocDataProviderFactory.class); for (final AdHocDataProviderFactory dp : dataProviderLoader) { if (configSettings != null) { dp.setConfiguration(configSettings); } if (dp.isValid()) { log.info("Found ad hoc data provider factory " + dp.getClass().getName()); adHocProviderFactories.put(dp.getPrefix(), dp); dp.initialize(conf); } else { log.info("Skipping ad hoc data provider " + dp.getClass().getName() + " because isValid returned false"); } } } if (mrsImageProviderFactories == null) { log.info("Initializing image provider factories"); mrsImageProviderFactories = new HashMap<String, MrsImageDataProviderFactory>(); // Find the mrsImageProviders final ServiceLoader<MrsImageDataProviderFactory> dataProviderLoader = ServiceLoader .load(MrsImageDataProviderFactory.class); for (final MrsImageDataProviderFactory dp : dataProviderLoader) { try { if (configSettings != null) { dp.setConfiguration(configSettings); } if (dp.isValid()) { log.info("Found mrs image data provider factory {} {}", dp.getPrefix(), dp.getClass().getName()); mrsImageProviderFactories.put(dp.getPrefix(), dp); dp.initialize(conf); } else { log.info("Skipping mrs image data provider " + dp.getClass().getName() + " because isValid returned false"); } } catch (Exception e) { // no op, just won't put the provider in the list log.warn("Ignoring " + dp.getClass().getName(), e); } } } if (vectorProviderFactories == null) { log.info("Initializing vector provider factories"); boolean debugEnabled = log.isDebugEnabled(); vectorProviderFactories = new HashMap<String, VectorDataProviderFactory>(); if (debugEnabled) { log.debug("Finding vector provider factories"); } // Find the vectorProviders final ServiceLoader<VectorDataProviderFactory> dataProviderLoader = ServiceLoader .load(VectorDataProviderFactory.class); int count = 0; for (final VectorDataProviderFactory dp : dataProviderLoader) { try { if (debugEnabled) { log.debug("Checking if vector factory is valid: " + dp.getClass().getName() + " with config " + ((conf == null) ? "null" : "not null")); } if (configSettings != null) { dp.setConfiguration(configSettings); } if (dp.isValid()) { if (debugEnabled) { log.debug("Factory " + dp.getClass().getName() + " is valid, uses prefix: " + dp.getPrefix()); } vectorProviderFactories.put(dp.getPrefix(), dp); dp.initialize(conf); count++; } else { if (debugEnabled) { log.debug("Factory " + dp.getClass().getName() + " is NOT valid, uses prefix: " + dp.getPrefix()); } log.info("Skipping vector data provider " + dp.getClass().getName() + " because isValid returned false"); } } catch (Exception e) { log.warn("Skipping vector factory provider " + dp.getClass().getName() + " due to exception", e); } } if (count == 0) { log.warn("No vector factory providers were found"); } } findPreferredProvider(conf); } public static void addDependencies(final Configuration conf) throws IOException { if (adHocProviderFactories != null) { for (final AdHocDataProviderFactory dp : adHocProviderFactories.values()) { DependencyLoader.addDependencies(conf, dp.getClass()); } } if (mrsImageProviderFactories != null) { for (final MrsImageDataProviderFactory dp : mrsImageProviderFactories.values()) { DependencyLoader.addDependencies(conf, dp.getClass()); } } if (vectorProviderFactories != null) { for (final VectorDataProviderFactory dp : vectorProviderFactories.values()) { DependencyLoader.addDependencies(conf, dp.getClass()); } } } public static Set<String> getDependencies() throws IOException { log.debug("Getting dependencies for all providers"); initialize(getBasicConfig()); Set<String> dependencies = new HashSet<String>(); if (adHocProviderFactories != null) { for (final AdHocDataProviderFactory dp : adHocProviderFactories.values()) { log.debug("Getting dependencies for " + dp.getClass().getName()); Set<String> d = DependencyLoader.getDependencies(dp.getClass()); if (d != null) { dependencies.addAll(d); } } } if (mrsImageProviderFactories != null) { for (final MrsImageDataProviderFactory dp : mrsImageProviderFactories.values()) { log.debug("Getting dependencies for " + dp.getClass().getName()); Set<String> d = DependencyLoader.getDependencies(dp.getClass()); if (d != null) { dependencies.addAll(d); } } } if (vectorProviderFactories != null) { for (final VectorDataProviderFactory dp : vectorProviderFactories.values()) { log.debug("Getting dependencies for " + dp.getClass().getName()); Set<String> d = DependencyLoader.getDependencies(dp.getClass()); if (d != null) { dependencies.addAll(d); } } } return dependencies; } private static void findPreferredProvider(Configuration conf) { preferredAdHocProviderName = findValue(conf, PREFERRED_ADHOC_PROVIDER_NAME, PREFERRED_ADHOC_PROPERTYNAME); // no preferred provider, use the 1st one... if (preferredAdHocProviderName == null) { for (final AdHocDataProviderFactory factory : adHocProviderFactories.values()) { preferredAdHocProviderName = factory.getPrefix(); setValue(preferredAdHocProviderName, conf, PREFERRED_ADHOC_PROVIDER_NAME, PREFERRED_ADHOC_PROPERTYNAME); log.info("Making {} preferred ad hoc provider ", preferredAdHocProviderName); break; } } else { log.debug("Using preferred ad hoc provider {}", preferredAdHocProviderName); } preferredImageProviderName = findValue(conf, PREFERRED_MRSIMAGE_PROVIDER_NAME, PREFERRED_MRSIMAGE_PROPERTYNAME); // no preferred provider, use the 1st one... if (preferredImageProviderName == null) { for (final MrsImageDataProviderFactory factory : mrsImageProviderFactories.values()) { preferredImageProviderName = factory.getPrefix(); setValue(preferredImageProviderName, conf, PREFERRED_MRSIMAGE_PROVIDER_NAME, PREFERRED_MRSIMAGE_PROPERTYNAME); log.info("Making {} preferred image provider ", preferredImageProviderName); break; } } else { log.debug("Using preferred image provider " + preferredImageProviderName); } preferredVectorProviderName = findValue(conf, PREFERRED_VECTOR_PROVIDER_NAME, PREFERRED_VECTOR_PROPERTYNAME); // no preferred provider, use the 1st one... if (preferredVectorProviderName == null) { for (final VectorDataProviderFactory factory : vectorProviderFactories.values()) { preferredVectorProviderName = factory.getPrefix(); setValue(preferredVectorProviderName, conf, PREFERRED_VECTOR_PROVIDER_NAME, PREFERRED_VECTOR_PROPERTYNAME); log.info("Making {} preferred vector provider ", preferredVectorProviderName); break; } } else { log.debug("Using preferred vector provider " + preferredVectorProviderName); } } private static String findValue(final Configuration conf, final String confName, final String propName) { String name = null; // 1st look in the config if (conf != null) { name = conf.get(confName, null); } if (name == null) { Properties mp = MrGeoProperties.getInstance(); if (mp != null) { name = MrGeoProperties.getInstance().getProperty(propName, null); } // look for the generic name if (name == null) { mp = MrGeoProperties.getInstance(); if (mp != null) { name = MrGeoProperties.getInstance().getProperty(PREFERRED_PROPERTYNAME, null); } } } return name; } private static void setValue(String value, final Configuration conf, final String confName, final String propName) { if (conf != null) { conf.set(confName, value); } Properties mp = MrGeoProperties.getInstance(); if (mp != null) { MrGeoProperties.getInstance().setProperty(propName, value); } } protected static String getPrefix(String name) { int ndx = name.indexOf(PREFIX_CHAR); if (ndx > 0) { // 1st check if the system see's the name as a valid URI try { URI uri = new URL(name).toURI(); return null; } catch (URISyntaxException | MalformedURLException e) { // no op } // now check for the :// part (usually for URI's not added to the system, like hdfs://, or s3:// int afterPrefixIndex = ndx + PREFIX_CHAR.length(); // If the prefix character is a colon, we need to make sure that the name is // not actually a URL. So if the name has at least two more characters, and // the two characters immediately following the colon are //, then we return // no prefix because we assume it's a URL. if ((PREFIX_CHAR.equals(":")) && (name.length() > afterPrefixIndex + 3) && (name.charAt(afterPrefixIndex) == '/') && (name.charAt(afterPrefixIndex + 1) == '/')) { return null; } else { return name.substring(0, ndx); } } return null; } private static Configuration getBasicConfig() { if (basicConf == null) { basicConf = HadoopUtils.createConfiguration(); } return basicConf; } }