Java tutorial
/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.hadoop.fs; import java.io.IOException; import java.lang.reflect.Method; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.springframework.beans.factory.FactoryBean; import org.springframework.beans.factory.InitializingBean; import org.springframework.core.io.Resource; import org.springframework.data.hadoop.fs.DistributedCacheFactoryBean.CacheEntry.EntryType; import org.springframework.data.hadoop.util.VersionUtils; import org.springframework.util.Assert; import org.springframework.util.ObjectUtils; import org.springframework.util.ReflectionUtils; import org.springframework.util.StringUtils; /** * Factory for easy declarative configuration of a {@link DistributedCache}. * * @author Costin Leau * @author Thomas Risberg */ public class DistributedCacheFactoryBean implements InitializingBean, FactoryBean<DistributedCache> { /** * Class describing an entry of the distributed cache. * * @author Costin Leau */ public static class CacheEntry { /** * A distributed cache entry type. * * @author Costin Leau */ public enum EntryType { /** Local entry */ LOCAL, /** Cache wide entry*/ CACHE, /** Classpath entry*/ CP } final EntryType type; final String value; /** * Constructs a new <code>CacheEntry</code> instance. * * @param type entry type * @param value entry value */ public CacheEntry(EntryType type, String value) { this.type = type; this.value = value; } } private static boolean FILE_SEPARATOR_WARNING = true; private Configuration conf; private DistributedCache ds; private FileSystem fs; private boolean createSymlink = false; private Collection<CacheEntry> entries; @Override public DistributedCache getObject() throws Exception { return ds; } @Override public Class<?> getObjectType() { return DistributedCache.class; } @Override public boolean isSingleton() { return true; } @Override public void afterPropertiesSet() throws Exception { Assert.notNull(conf, "A Hadoop configuration is required"); Assert.notEmpty(entries, "No entries specified"); // fall back to system discovery if (fs == null) { fs = FileSystem.get(conf); } ds = new DistributedCache(); if (createSymlink) { DistributedCache.createSymlink(conf); } HdfsResourceLoader loader = new HdfsResourceLoader(conf); boolean warnCpEntry = !":".equals(System.getProperty("path.separator")); try { for (CacheEntry entry : entries) { Resource[] resources = loader.getResources(entry.value); if (!ObjectUtils.isEmpty(resources)) { for (Resource resource : resources) { HdfsResource res = (HdfsResource) resource; URI uri = res.getURI(); String path = getPathWithFragment(uri); String defaultLink = resource.getFilename(); boolean isArchive = (defaultLink.endsWith(".tgz") || defaultLink.endsWith(".tar") || defaultLink.endsWith(".tar.gz") || defaultLink.endsWith(".zip")); switch (entry.type) { case CP: // Path does not handle fragments so use the URI instead Path p = new Path(URI.create(path)); if (FILE_SEPARATOR_WARNING && warnCpEntry) { LogFactory.getLog(DistributedCacheFactoryBean.class).warn( "System path separator is not ':' - this will likely cause invalid classpath entries within the DistributedCache. See the docs and HADOOP-9123 for more information."); // show the warning once per CL FILE_SEPARATOR_WARNING = false; } if (isArchive) { DistributedCache.addArchiveToClassPath(p, conf, fs); } else { DistributedCache.addFileToClassPath(p, conf, fs); } break; case LOCAL: if (isArchive) { if (VersionUtils.isHadoop2X()) { // TODO - Need to figure out how to add local archive } else { Method addLocalArchives = ReflectionUtils.findMethod(DistributedCache.class, "addLocalArchives", Configuration.class, String.class); addLocalArchives.invoke(null, conf, path); } } else { if (VersionUtils.isHadoop2X()) { // TODO - Need to figure out how to add local files } else { Method addLocalFiles = ReflectionUtils.findMethod(DistributedCache.class, "addLocalFiles", Configuration.class, String.class); addLocalFiles.invoke(null, conf, path); } } break; case CACHE: if (!path.contains("#")) { // use the path to avoid adding the host:port into the uri uri = URI.create(path + "#" + defaultLink); } if (isArchive) { DistributedCache.addCacheArchive(uri, conf); } else { DistributedCache.addCacheFile(uri, conf); } break; } } } } } finally { loader.close(); } } /** * Sets the entries to be added to the distributed cache. * * @param entries The entries to set. */ public void setEntries(Collection<CacheEntry> entries) { this.entries = entries; } /** * Sets the local entries to be added to the distributed cache. * * @param resources The entries to set. */ public void setLocalEntries(Collection<Resource> resources) { setEntries(EntryType.LOCAL, resources); } /** * Sets the cache entries to be added to the distributed cache. * * @param resources The entries to set. */ public void setCacheEntries(Collection<Resource> resources) { setEntries(EntryType.CACHE, resources); } /** * Sets the class-path entries to be added to the distributed cache. * * @param resources The entries to set. */ public void setClassPathEntries(Collection<Resource> resources) { setEntries(EntryType.CP, resources); } private void setEntries(EntryType cp, Collection<Resource> resources) { if (resources == null) { setEntries(null); } else { Collection<CacheEntry> entries = new ArrayList<CacheEntry>(resources.size()); for (Resource resource : resources) { try { entries.add(new CacheEntry(cp, resource.getURI().toString())); } catch (IOException ex) { throw new IllegalArgumentException("Cannot resolve resource " + resource, ex); } } setEntries(entries); } } /** * Sets the Hadoop configuration for the cache. * * @param conf The conf to set. */ public void setConfiguration(Configuration conf) { this.conf = conf; } /** * Sets the Hadoop file system for this cache. * * @param fs File system to set. */ public void setFileSystem(FileSystem fs) { this.fs = fs; } /** * Indicates whether to create symlinks or not. * * @param createSymlink whether to create symlinks or not. */ public void setCreateSymlink(boolean createSymlink) { this.createSymlink = createSymlink; } private static String getPathWithFragment(URI uri) { String path = uri.getPath(); String fragment = uri.getFragment(); if (StringUtils.hasText(fragment)) { path = path + "#" + fragment; } return path; } }