com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java Source code

Java tutorial

Introduction

Here is the source code for com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

Source

package com.scaleoutsoftware.soss.hserver.hadoop;
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicLong;

import com.scaleoutsoftware.soss.client.pmi.InvocationWorker;
import com.scaleoutsoftware.soss.hserver.InvocationParameters;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.FSDownload;

import com.google.common.collect.Maps;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * This class is based on LocalDistributedCacheManager from Apache Hadoop 2.2.0
 */
@SuppressWarnings("deprecation")
class DistributedCacheManager {
    public static final Log LOG = LogFactory.getLog(DistributedCacheManager.class);

    private List<String> localArchives = new ArrayList<String>();
    private List<String> localFiles = new ArrayList<String>();
    private List<String> localClasspaths = new ArrayList<String>();

    private List<File> symlinksCreated = new ArrayList<File>();

    private boolean setupCalled = false;

    private final static int SYNCHRONIZATION_WAIT_MS = 10000;
    private final static int WAIT_GRANULARITY_MS = 10;
    private final static String ACTION_NAME = "Updating Hadoop distributed cache.";

    /**
     * Set up the distributed cache by localizing the resources, and updating
     * the configuration with references to the localized resources.
     * @param conf job configuration
     * @throws IOException
     */
    public void setup(Configuration conf) throws IOException {
        //If we are not 0th worker, wait for 0th worker to set up the cache
        if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
            try {
                InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                        WAIT_GRANULARITY_MS);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            return;
        }

        File workDir = new File(System.getProperty("user.dir"));

        // Generate YARN local resources objects corresponding to the distributed
        // cache configuration
        Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
        MRApps.setupDistributedCache(conf, localResources);

        //CODE CHANGE FROM ORIGINAL FILE:
        //We need to clear the resources from jar files, since they are distributed through the IG.
        //
        Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
        while (iterator.hasNext()) {
            Entry<String, LocalResource> entry = iterator.next();
            if (entry.getKey().endsWith(".jar")) {
                iterator.remove();
            }
        }

        // Generating unique numbers for FSDownload.

        AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

        // Find which resources are to be put on the local classpath
        Map<String, Path> classpaths = new HashMap<String, Path>();
        Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
        if (archiveClassPaths != null) {
            for (Path p : archiveClassPaths) {
                FileSystem remoteFS = p.getFileSystem(conf);
                p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
                classpaths.put(p.toUri().getPath().toString(), p);
            }
        }

        Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
        if (fileClassPaths != null) {
            for (Path p : fileClassPaths) {
                FileSystem remoteFS = p.getFileSystem(conf);
                p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
                classpaths.put(p.toUri().getPath().toString(), p);
            }
        }

        // Localize the resources
        LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
        FileContext localFSFileContext = FileContext.getLocalFSFileContext();
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

        ExecutorService exec = null;
        try {
            ThreadFactory tf = new ThreadFactoryBuilder()
                    .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
            exec = Executors.newCachedThreadPool(tf);
            Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
            Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
            for (LocalResource resource : localResources.values()) {
                Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                        new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
                Future<Path> future = exec.submit(download);
                resourcesToPaths.put(resource, future);
            }
            for (Entry<String, LocalResource> entry : localResources.entrySet()) {
                LocalResource resource = entry.getValue();
                Path path;
                try {
                    path = resourcesToPaths.get(resource).get();
                } catch (InterruptedException e) {
                    throw new IOException(e);
                } catch (ExecutionException e) {
                    throw new IOException(e);
                }
                String pathString = path.toUri().toString();
                String link = entry.getKey();
                String target = new File(path.toUri()).getPath();
                symlink(workDir, target, link);

                if (resource.getType() == LocalResourceType.ARCHIVE) {
                    localArchives.add(pathString);
                } else if (resource.getType() == LocalResourceType.FILE) {
                    localFiles.add(pathString);
                } else if (resource.getType() == LocalResourceType.PATTERN) {
                    //PATTERN is not currently used in local mode
                    throw new IllegalArgumentException(
                            "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
                }
                Path resourcePath;
                try {
                    resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
                } catch (URISyntaxException e) {
                    throw new IOException(e);
                }
                LOG.info(String.format("Localized %s as %s", resourcePath, path));
                String cp = resourcePath.toUri().getPath();
                if (classpaths.keySet().contains(cp)) {
                    localClasspaths.add(path.toUri().getPath().toString());
                }
            }
        } finally {
            if (exec != null) {
                exec.shutdown();
            }
        }
        // Update the configuration object with localized data.
        if (!localArchives.isEmpty()) {
            conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                    StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
        }
        if (!localFiles.isEmpty()) {
            conf.set(MRJobConfig.CACHE_LOCALFILES,
                    StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
        }
        setupCalled = true;

        //If we are  0th worker, signal action complete
        if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
            try {
                InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

    }

    /**
     * Utility method for creating a symlink and warning on errors.
     *
     * If link is null, does nothing.
     */
    private void symlink(File workDir, String target, String link) throws IOException {
        if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);

            //CODE CHANGE FROM ORIGINAL FILE, BUG FIX:
            //
            //If the cleanup of the previous job failed for some reason, we can have lingering symlink,
            //pointing to the obsolete file (in that case flink.exists() == true) or to non-existant
            //file(flink.exists() == false). In the second case, the original code tried to create symlink
            //anyway causing "already exists" error. In the first case, this method used to do nothing
            //without logging it, which effectively left the old symlink in place, leading to
            //elusive bugs.
            //
            //Changes:
            //1.Try delete symlink, and log if there was a symlink to delete (it means something wrong with cleanup)
            //2. Remove the if(!flink.exist()) check before creating symlink.
            if (flink.delete()) {
                LOG.warn(String.format("Symlink already existed, deleting: %s <- %s", target, link));
            }

            LOG.info(String.format("Creating symlink: %s <- %s", target, link));
            if (0 != FileUtil.symLink(target, link)) {
                LOG.warn(String.format("Failed to create symlink: %s <- %s", target, link));
            } else {
                symlinksCreated.add(new File(link));
            }

        }
    }

    /**
     * Are the resources that should be added to the classpath?
     * Should be called after setup().
     *
     */
    public boolean hasLocalClasspaths() {
        if (!setupCalled) {
            throw new IllegalStateException("hasLocalClasspaths() should be called after setup()");
        }
        return !localClasspaths.isEmpty();
    }

    /**
     * Creates a class loader that includes the designated
     * files and archives.
     */
    public ClassLoader makeClassLoader(final ClassLoader parent) throws MalformedURLException {
        final URL[] urls = new URL[localClasspaths.size()];
        for (int i = 0; i < localClasspaths.size(); ++i) {
            urls[i] = new File(localClasspaths.get(i)).toURI().toURL();
            LOG.info(urls[i]);
        }
        return AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
            @Override
            public ClassLoader run() {
                return new URLClassLoader(urls, parent);
            }
        });
    }

    public void close() throws IOException {
        // TODO: commenting out for now, interaction with other workers on the same host()
        //        for (File symlink : symlinksCreated) {
        //            if (!symlink.delete()) {
        //                LOG.warn("Failed to delete symlink created by the local job runner: " +
        //                        symlink);
        //            }
        //        }
        //        FileContext localFSFileContext = FileContext.getLocalFSFileContext();
        //        for (String archive : localArchives) {
        //            localFSFileContext.delete(new Path(archive), true);
        //        }
        //        for (String file : localFiles) {
        //            localFSFileContext.delete(new Path(file), true);
        //        }
    }
}