Java tutorial
/* * Copyright 2014-2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.explore.service; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.data2.datafabric.dataset.service.DatasetService; import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory; import co.cask.cdap.explore.guice.ExploreRuntimeModule; import co.cask.cdap.explore.service.hive.Hive12CDH5ExploreService; import co.cask.cdap.explore.service.hive.Hive12ExploreService; import co.cask.cdap.explore.service.hive.Hive13ExploreService; import co.cask.cdap.explore.service.hive.Hive14ExploreService; import co.cask.cdap.format.RecordFormats; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.twill.api.ClassAcceptor; import org.apache.twill.internal.utils.Dependencies; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.util.LinkedHashSet; import java.util.Set; import java.util.regex.Pattern; /** * Utility class for the explore service. */ public class ExploreServiceUtils { private static final Logger LOG = LoggerFactory.getLogger(ExploreServiceUtils.class); /** * Hive support enum. */ public enum HiveSupport { // CDH 5.0 to 5.1 uses Hive 0.12 // CDH >5.1 uses Hive >=0.13.1 (aka 1.0, which Hive14ExploreService supports) HIVE_CDH5_0(Pattern.compile("^.*cdh5.0\\..*$"), Hive12CDH5ExploreService.class), HIVE_CDH5_1( Pattern.compile("^.*cdh5.1\\..*$"), Hive12CDH5ExploreService.class), HIVE_CDH5( Pattern.compile("^.*cdh5\\..*$"), Hive14ExploreService.class), HIVE_12(null, Hive12ExploreService.class), HIVE_13(null, Hive13ExploreService.class), HIVE_14(null, Hive14ExploreService.class), HIVE_1_0(null, Hive14ExploreService.class), HIVE_1_1(null, Hive14ExploreService.class); private final Pattern hadoopVersionPattern; private final Class<? extends ExploreService> hiveExploreServiceClass; HiveSupport(Pattern hadoopVersionPattern, Class<? extends ExploreService> hiveExploreServiceClass) { this.hadoopVersionPattern = hadoopVersionPattern; this.hiveExploreServiceClass = hiveExploreServiceClass; } public Pattern getHadoopVersionPattern() { return hadoopVersionPattern; } public Class<? extends ExploreService> getHiveExploreServiceClass() { return hiveExploreServiceClass; } } // Caching the dependencies so that we don't trace them twice private static Set<File> exploreDependencies = null; // Caching explore class loader private static ClassLoader exploreClassLoader = null; private static final Pattern HIVE_SITE_FILE_PATTERN = Pattern.compile("^.*/hive-site\\.xml$"); private static final Pattern YARN_SITE_FILE_PATTERN = Pattern.compile("^.*/yarn-site\\.xml$"); private static final Pattern MAPRED_SITE_FILE_PATTERN = Pattern.compile("^.*/mapred-site\\.xml$"); /** * Get all the files contained in a class path. */ public static Iterable<File> getClassPathJarsFiles(String hiveClassPath) { if (hiveClassPath == null) { return null; } return Iterables.transform(Splitter.on(':').split(hiveClassPath), STRING_FILE_FUNCTION); } private static final Function<String, File> STRING_FILE_FUNCTION = new Function<String, File>() { @Override public File apply(String input) { return new File(input).getAbsoluteFile(); } }; /** * Builds a class loader with the class path provided. */ public static ClassLoader getExploreClassLoader() { if (exploreClassLoader != null) { return exploreClassLoader; } // EXPLORE_CLASSPATH and EXPLORE_CONF_FILES will be defined in startup scripts if Hive is installed. String exploreClassPathStr = System.getProperty(Constants.Explore.EXPLORE_CLASSPATH); LOG.debug("Explore classpath = {}", exploreClassPathStr); if (exploreClassPathStr == null) { throw new RuntimeException("System property " + Constants.Explore.EXPLORE_CLASSPATH + " is not set."); } String exploreConfPathStr = System.getProperty(Constants.Explore.EXPLORE_CONF_FILES); LOG.debug("Explore confPath = {}", exploreConfPathStr); if (exploreConfPathStr == null) { throw new RuntimeException("System property " + Constants.Explore.EXPLORE_CONF_FILES + " is not set."); } Iterable<File> hiveClassPath = getClassPathJarsFiles(exploreClassPathStr); Iterable<File> hiveConfFiles = getClassPathJarsFiles(exploreConfPathStr); ImmutableList.Builder<URL> builder = ImmutableList.builder(); for (File file : Iterables.concat(hiveClassPath, hiveConfFiles)) { try { if (file.getName().matches(".*\\.xml")) { builder.add(file.getParentFile().toURI().toURL()); } else { builder.add(file.toURI().toURL()); } } catch (MalformedURLException e) { LOG.error("Jar URL is malformed", e); throw Throwables.propagate(e); } } exploreClassLoader = new URLClassLoader(Iterables.toArray(builder.build(), URL.class), ClassLoader.getSystemClassLoader()); return exploreClassLoader; } public static Class<? extends ExploreService> getHiveService() { HiveSupport hiveVersion = checkHiveSupport(null); return hiveVersion.getHiveExploreServiceClass(); } public static HiveSupport checkHiveSupport() { return checkHiveSupport(getExploreClassLoader()); } /** * Check that Hive is in the class path - with a right version. */ public static HiveSupport checkHiveSupport(ClassLoader hiveClassLoader) { // First try to figure which hive support is relevant based on Hadoop distribution name String hadoopVersion = VersionInfo.getVersion(); LOG.info("Hadoop version is: {}", hadoopVersion); for (HiveSupport hiveSupport : HiveSupport.values()) { if (hiveSupport.getHadoopVersionPattern() != null && hiveSupport.getHadoopVersionPattern().matcher(hadoopVersion).matches()) { return hiveSupport; } } ClassLoader usingCL = hiveClassLoader; if (usingCL == null) { usingCL = ExploreServiceUtils.class.getClassLoader(); } try { Class<?> hiveVersionInfoClass = usingCL.loadClass("org.apache.hive.common.util.HiveVersionInfo"); String hiveVersion = (String) hiveVersionInfoClass.getDeclaredMethod("getVersion").invoke(null); if (hiveVersion.startsWith("0.12.")) { return HiveSupport.HIVE_12; } else if (hiveVersion.startsWith("0.13.")) { return HiveSupport.HIVE_13; } else if (hiveVersion.startsWith("0.14.") || hiveVersion.startsWith("1.0.")) { return HiveSupport.HIVE_14; } else if (hiveVersion.startsWith("1.1.")) { return HiveSupport.HIVE_1_1; } } catch (Exception e) { throw Throwables.propagate(e); } throw new RuntimeException("Hive distribution not supported. Set the configuration '" + Constants.Explore.EXPLORE_ENABLED + "' to false to start up without Explore."); } /** * Return the list of absolute paths of the bootstrap classes. */ public static Set<String> getBoostrapClasses() { ImmutableSet.Builder<String> builder = ImmutableSet.builder(); for (String classpath : Splitter.on(File.pathSeparatorChar) .split(System.getProperty("sun.boot.class.path"))) { File file = new File(classpath); builder.add(file.getAbsolutePath()); try { builder.add(file.getCanonicalPath()); } catch (IOException e) { LOG.warn("Could not add canonical path to aux class path for file {}", file.toString(), e); } } return builder.build(); } /** * Trace the jar dependencies needed by the Explore container. Uses a separate class loader to load Hive classes, * built using the explore classpath passed as a system property to master. * * @return an ordered set of jar files. */ public static Set<File> traceExploreDependencies() throws IOException { if (exploreDependencies != null) { return exploreDependencies; } ClassLoader classLoader = getExploreClassLoader(); return traceExploreDependencies(classLoader); } /** * Trace the jar dependencies needed by the Explore container. * * @param classLoader class loader to use to trace the dependencies. * If it is null, use the class loader of this class. * @return an ordered set of jar files. */ public static Set<File> traceExploreDependencies(ClassLoader classLoader) throws IOException { if (exploreDependencies != null) { return exploreDependencies; } ClassLoader usingCL = classLoader; if (classLoader == null) { usingCL = ExploreRuntimeModule.class.getClassLoader(); } final Set<String> bootstrapClassPaths = getBoostrapClasses(); ClassAcceptor classAcceptor = new ClassAcceptor() { /* Excluding any class contained in the bootstrapClassPaths and Kryo classes. * We need to remove Kryo dependency in the Explore container. Spark introduced version 2.21 version of Kryo, * which would be normally shipped to the Explore container. Yet, Hive requires Kryo 2.22, * and gets it from the Hive jars - hive-exec.jar to be precise. * */ @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (bootstrapClassPaths.contains(classPathUrl.getFile()) || className.startsWith("com.esotericsoftware.kryo")) { return false; } return true; } }; Set<File> hBaseTableDeps = traceDependencies(HBaseTableUtilFactory.getHBaseTableUtilClass().getName(), usingCL, classAcceptor); // Note the order of dependency jars is important so that HBase jars come first in the classpath order // LinkedHashSet maintains insertion order while removing duplicate entries. Set<File> orderedDependencies = new LinkedHashSet<>(); orderedDependencies.addAll(hBaseTableDeps); orderedDependencies.addAll(traceDependencies(DatasetService.class.getName(), usingCL, classAcceptor)); orderedDependencies.addAll( traceDependencies("co.cask.cdap.hive.datasets.DatasetStorageHandler", usingCL, classAcceptor)); orderedDependencies.addAll( traceDependencies("co.cask.cdap.hive.datasets.StreamStorageHandler", usingCL, classAcceptor)); orderedDependencies .addAll(traceDependencies("org.apache.hadoop.hive.ql.exec.mr.ExecDriver", usingCL, classAcceptor)); orderedDependencies .addAll(traceDependencies("org.apache.hive.service.cli.CLIService", usingCL, classAcceptor)); orderedDependencies.addAll( traceDependencies("org.apache.hadoop.mapred.YarnClientProtocolProvider", usingCL, classAcceptor)); orderedDependencies.addAll(traceDependencies(RecordFormats.class.getName(), usingCL, classAcceptor)); // Needed for - at least - CDH 4.4 integration orderedDependencies .addAll(traceDependencies("org.apache.hive.builtins.BuiltinUtils", usingCL, classAcceptor)); // Needed for - at least - CDH 5 integration orderedDependencies .addAll(traceDependencies("org.apache.hadoop.hive.shims.Hadoop23Shims", usingCL, classAcceptor)); exploreDependencies = orderedDependencies; return orderedDependencies; } /** * Trace the dependencies files of the given className, using the classLoader, * and including the classes that's accepted by the classAcceptor * * Nothing is returned if the classLoader does not contain the className. */ public static Set<File> traceDependencies(String className, ClassLoader classLoader, final ClassAcceptor classAcceptor) throws IOException { ClassLoader usingCL = classLoader; if (usingCL == null) { usingCL = ExploreRuntimeModule.class.getClassLoader(); } final Set<File> jarFiles = Sets.newHashSet(); Dependencies.findClassDependencies(usingCL, new ClassAcceptor() { @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (!classAcceptor.accept(className, classUrl, classPathUrl)) { return false; } jarFiles.add(new File(classPathUrl.getFile())); return true; } }, className); return jarFiles; } /** * Updates environment variables in hive-site.xml, mapred-site.xml and yarn-site.xml for explore. * All other conf files are returned without any update. * @param confFile conf file to update * @param tempDir temp dir to create files if necessary * @return the new conf file to use in place of confFile */ public static File updateConfFileForExplore(File confFile, File tempDir) { if (HIVE_SITE_FILE_PATTERN.matcher(confFile.getAbsolutePath()).matches()) { return updateHiveConfFile(confFile, tempDir); } else if (YARN_SITE_FILE_PATTERN.matcher(confFile.getAbsolutePath()).matches()) { return updateYarnConfFile(confFile, tempDir); } else if (MAPRED_SITE_FILE_PATTERN.matcher(confFile.getAbsolutePath()).matches()) { return updateMapredConfFile(confFile, tempDir); } else { return confFile; } } /** * Change yarn-site.xml file, and return a temp copy of it to which are added * necessary options. */ private static File updateYarnConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } String yarnAppClassPath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)); // add the pwd/* at the beginning of classpath. so user's jar will take precedence and without this change, // job.jar will be at the beginning of the classpath, since job.jar has old guava version classes, // we want to add pwd/* before yarnAppClassPath = "$PWD/*," + yarnAppClassPath; conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, yarnAppClassPath); File newYarnConfFile = new File(tempDir, "yarn-site.xml"); try (FileOutputStream os = new FileOutputStream(newYarnConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating and writing to temporary yarn-conf.xml conf file at {}", newYarnConfFile, e); throw Throwables.propagate(e); } return newYarnConfFile; } /** * Change mapred-site.xml file, and return a temp copy of it to which are added * necessary options. */ private static File updateMapredConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } String mrAppClassPath = conf.get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH); // Add the pwd/* at the beginning of classpath. Without this change, old jars from mr framework classpath // get into classpath. mrAppClassPath = "$PWD/*," + mrAppClassPath; conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, mrAppClassPath); File newMapredConfFile = new File(tempDir, "mapred-site.xml"); try (FileOutputStream os = new FileOutputStream(newMapredConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating and writing to temporary mapred-site.xml conf file at {}", newMapredConfFile, e); throw Throwables.propagate(e); } return newMapredConfFile; } /** * Change hive-site.xml file, and return a temp copy of it to which are added * necessary options. */ private static File updateHiveConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } // we prefer jars at container's root directory before job.jar, // we edit the YARN_APPLICATION_CLASSPATH in yarn-site.xml using // co.cask.cdap.explore.service.ExploreServiceUtils.updateYarnConfFile and // setting the MAPREDUCE_JOB_CLASSLOADER and MAPREDUCE_JOB_USER_CLASSPATH_FIRST to false will put // YARN_APPLICATION_CLASSPATH before job.jar for container's classpath. conf.setBoolean(Job.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false); File newHiveConfFile = new File(tempDir, "hive-site.xml"); try (FileOutputStream os = new FileOutputStream(newHiveConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating temporary hive-site.xml conf file at {}", newHiveConfFile, e); throw Throwables.propagate(e); } return newHiveConfFile; } }