Java tutorial
package com.scaleoutsoftware.soss.hserver.hadoop; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.*; import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicLong; import com.scaleoutsoftware.soss.client.pmi.InvocationWorker; import com.scaleoutsoftware.soss.hserver.InvocationParameters; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.filecache.DistributedCache; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.FSDownload; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * This class is based on LocalDistributedCacheManager from Apache Hadoop 2.2.0 */ @SuppressWarnings("deprecation") class DistributedCacheManager { public static final Log LOG = LogFactory.getLog(DistributedCacheManager.class); private List<String> localArchives = new ArrayList<String>(); private List<String> localFiles = new ArrayList<String>(); private List<String> localClasspaths = new ArrayList<String>(); private List<File> symlinksCreated = new ArrayList<File>(); private boolean setupCalled = false; private final static int SYNCHRONIZATION_WAIT_MS = 10000; private final static int WAIT_GRANULARITY_MS = 10; private final static String ACTION_NAME = "Updating Hadoop distributed cache."; /** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf job configuration * @throws IOException */ public void setup(Configuration conf) throws IOException { //If we are not 0th worker, wait for 0th worker to set up the cache if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS, WAIT_GRANULARITY_MS); } catch (Exception e) { throw new RuntimeException(e); } return; } File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); //CODE CHANGE FROM ORIGINAL FILE: //We need to clear the resources from jar files, since they are distributed through the IG. // Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, LocalResource> entry = iterator.next(); if (entry.getKey().endsWith(".jar")) { iterator.remove(); } } // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<String, Path>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManager Downloader #%d").build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException( "Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info(String.format("Localized %s as %s", resourcePath, path)); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; //If we are 0th worker, signal action complete if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME); } catch (Exception e) { throw new RuntimeException(e); } } } /** * Utility method for creating a symlink and warning on errors. * * If link is null, does nothing. */ private void symlink(File workDir, String target, String link) throws IOException { if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); //CODE CHANGE FROM ORIGINAL FILE, BUG FIX: // //If the cleanup of the previous job failed for some reason, we can have lingering symlink, //pointing to the obsolete file (in that case flink.exists() == true) or to non-existant //file(flink.exists() == false). In the second case, the original code tried to create symlink //anyway causing "already exists" error. In the first case, this method used to do nothing //without logging it, which effectively left the old symlink in place, leading to //elusive bugs. // //Changes: //1.Try delete symlink, and log if there was a symlink to delete (it means something wrong with cleanup) //2. Remove the if(!flink.exist()) check before creating symlink. if (flink.delete()) { LOG.warn(String.format("Symlink already existed, deleting: %s <- %s", target, link)); } LOG.info(String.format("Creating symlink: %s <- %s", target, link)); if (0 != FileUtil.symLink(target, link)) { LOG.warn(String.format("Failed to create symlink: %s <- %s", target, link)); } else { symlinksCreated.add(new File(link)); } } } /** * Are the resources that should be added to the classpath? * Should be called after setup(). * */ public boolean hasLocalClasspaths() { if (!setupCalled) { throw new IllegalStateException("hasLocalClasspaths() should be called after setup()"); } return !localClasspaths.isEmpty(); } /** * Creates a class loader that includes the designated * files and archives. */ public ClassLoader makeClassLoader(final ClassLoader parent) throws MalformedURLException { final URL[] urls = new URL[localClasspaths.size()]; for (int i = 0; i < localClasspaths.size(); ++i) { urls[i] = new File(localClasspaths.get(i)).toURI().toURL(); LOG.info(urls[i]); } return AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { @Override public ClassLoader run() { return new URLClassLoader(urls, parent); } }); } public void close() throws IOException { // TODO: commenting out for now, interaction with other workers on the same host() // for (File symlink : symlinksCreated) { // if (!symlink.delete()) { // LOG.warn("Failed to delete symlink created by the local job runner: " + // symlink); // } // } // FileContext localFSFileContext = FileContext.getLocalFSFileContext(); // for (String archive : localArchives) { // localFSFileContext.delete(new Path(archive), true); // } // for (String file : localFiles) { // localFSFileContext.delete(new Path(file), true); // } } }