org.apache.hive.hcatalog.templeton.tool.TempletonUtils.java Source code

Introduction

Here is the source code for org.apache.hive.hcatalog.templeton.tool.TempletonUtils.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.templeton.tool;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.ws.rs.core.UriBuilder;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.hcatalog.templeton.UgiFactory;
import org.apache.hive.hcatalog.templeton.BadParam;

/**
 * General utility methods.
 */
public class TempletonUtils {
    private static final Log LOG = LogFactory.getLog(TempletonUtils.class);

    /**
     * Is the object non-empty?
     */
    public static boolean isset(String s) {
        return (s != null) && (s.length() > 0);
    }

    /**
     * Is the object non-empty?
     */
    public static boolean isset(char ch) {
        return (ch != 0);
    }

    /**
     * Is the object non-empty?
     */
    public static <T> boolean isset(T[] a) {
        return (a != null) && (a.length > 0);
    }

    /**
     * Is the object non-empty?
     */
    public static <T> boolean isset(Collection<T> col) {
        return (col != null) && (!col.isEmpty());
    }

    /**
     * Is the object non-empty?
     */
    public static <K, V> boolean isset(Map<K, V> col) {
        return (col != null) && (!col.isEmpty());
    }

    //looking for map 100% reduce 100%
    public static final Pattern JAR_COMPLETE = Pattern.compile(" map \\d+%\\s+reduce \\d+%$");
    public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$");
    //looking for map = 100%,  reduce = 100%
    public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$");
    /**
     * Hive on Tez produces progress report that looks like this
     * Map 1: -/-   Reducer 2: 0/1   
     * Map 1: -/-   Reducer 2: 0(+1)/1   
     * Map 1: -/-   Reducer 2: 1/1
     * 
     * -/- means there are no tasks (yet)
     * 0/1 means 1 total tasks, 0 completed
     * 1(+2)/3 means 3 total, 1 completed and 2 running
     * 
     * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png
     * has more examples.
     * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total"
     * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the complete pattern but we'll drop "-/-" to exclude
     * groups that don't add information such as "Map 1: -/-"
     */
    public static final Pattern TEZ_COMPLETE = Pattern.compile("(Map|Reducer) (\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)");
    public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+");

    /**
     * Extract the percent complete line from Pig or Jar jobs.
     */
    public static String extractPercentComplete(String line) {
        Matcher jar = JAR_COMPLETE.matcher(line);
        if (jar.find())
            return jar.group().trim();

        Matcher pig = PIG_COMPLETE.matcher(line);
        if (pig.find())
            return pig.group().trim();

        Matcher hive = HIVE_COMPLETE.matcher(line);
        if (hive.find()) {
            return "map " + hive.group(1) + " reduce " + hive.group(2);
        }
        Matcher tez = TEZ_COMPLETE.matcher(line);
        if (tez.find()) {
            int totalTasks = 0;
            int completedTasks = 0;
            do {
                //here each group looks something like "Map 2: 2/4" "Reducer 3: 1(+2)/4"
                //just parse the numbers and ignore one from "Map 2" and from "(+2)" if it's there
                Matcher counts = TEZ_COUNTERS.matcher(tez.group());
                List<String> items = new ArrayList<String>(4);
                while (counts.find()) {
                    items.add(counts.group());
                }
                completedTasks += Integer.parseInt(items.get(1));
                if (items.size() == 3) {
                    totalTasks += Integer.parseInt(items.get(2));
                } else {
                    totalTasks += Integer.parseInt(items.get(3));
                }
            } while (tez.find());
            if (totalTasks == 0) {
                return "0% complete (0 total tasks)";
            }
            return completedTasks * 100 / totalTasks + "% complete";
        }
        return null;
    }

    public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$");
    public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$");
    public static final Pattern[] ID_PATTERNS = { JAR_ID, PIG_ID };

    /**
     * Extract the job id from jar jobs.
     */
    public static String extractChildJobId(String line) {
        for (Pattern p : ID_PATTERNS) {
            Matcher m = p.matcher(line);
            if (m.find())
                return m.group(1);
        }

        return null;
    }

    /**
     * Take an array of strings and encode it into one string.
     */
    public static String encodeArray(String[] plain) {
        if (plain == null)
            return null;

        String[] escaped = new String[plain.length];

        for (int i = 0; i < plain.length; ++i) {
            if (plain[i] == null) {
                plain[i] = "";
            }
            escaped[i] = StringUtils.escapeString(plain[i]);
        }

        return StringUtils.arrayToString(escaped);
    }

    /**
     * Encode a List into a string.
     */
    public static String encodeArray(List<String> list) {
        if (list == null)
            return null;
        String[] array = new String[list.size()];
        return encodeArray(list.toArray(array));
    }

    /**
     * Take an encode strings and decode it into an array of strings.
     */
    public static String[] decodeArray(String s) {
        if (s == null)
            return null;

        String[] escaped = StringUtils.split(s);
        String[] plain = new String[escaped.length];

        for (int i = 0; i < escaped.length; ++i)
            plain[i] = StringUtils.unEscapeString(escaped[i]);

        return plain;
    }

    public static String[] hadoopFsListAsArray(String files, Configuration conf, String user)
            throws URISyntaxException, FileNotFoundException, IOException, InterruptedException {
        if (files == null || conf == null) {
            return null;
        }
        String[] dirty = files.split(",");
        String[] clean = new String[dirty.length];

        for (int i = 0; i < dirty.length; ++i)
            clean[i] = hadoopFsFilename(dirty[i], conf, user);

        return clean;
    }

    public static String hadoopFsListAsString(String files, Configuration conf, String user)
            throws URISyntaxException, FileNotFoundException, IOException, InterruptedException {
        if (files == null || conf == null) {
            return null;
        }
        return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user));
    }

    public static String hadoopFsFilename(String fname, Configuration conf, String user)
            throws URISyntaxException, FileNotFoundException, IOException, InterruptedException {
        Path p = hadoopFsPath(fname, conf, user);
        if (p == null)
            return null;
        else
            return p.toString();
    }

    /**
     * Returns all files (non-recursive) in {@code dirName}
     */
    public static List<Path> hadoopFsListChildren(String dirName, Configuration conf, String user)
            throws URISyntaxException, IOException, InterruptedException {

        Path p = hadoopFsPath(dirName, conf, user);
        FileSystem fs = p.getFileSystem(conf);
        if (!fs.exists(p)) {
            return Collections.emptyList();
        }
        List<FileStatus> children = ShimLoader.getHadoopShims().listLocatedStatus(fs, p, null);
        if (!isset(children)) {
            return Collections.emptyList();
        }
        List<Path> files = new ArrayList<Path>();
        for (FileStatus stat : children) {
            files.add(stat.getPath());
        }
        return files;
    }

    /**
     * @return true iff we are sure the file is not there.
     */
    public static boolean hadoopFsIsMissing(FileSystem fs, Path p) {
        try {
            return !fs.exists(p);
        } catch (Throwable t) {
            // Got an error, might be there anyway due to a
            // permissions problem.
            return false;
        }
    }

    public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user)
            throws IOException, URISyntaxException {
        URI uri = new URI(origPathStr);

        if (uri.getPath().isEmpty()) {
            String newPath = "/user/" + user;
            uri = UriBuilder.fromUri(uri).replacePath(newPath).build();
        } else if (!new Path(uri.getPath()).isAbsolute()) {
            String newPath = "/user/" + user + "/" + uri.getPath();
            uri = UriBuilder.fromUri(uri).replacePath(newPath).build();
        } // no work needed for absolute paths

        return uri.toString();
    }

    public static Path hadoopFsPath(String fname, final Configuration conf, String user)
            throws URISyntaxException, IOException, InterruptedException {
        if (fname == null || conf == null) {
            return null;
        }

        UserGroupInformation ugi;
        if (user != null) {
            ugi = UgiFactory.getUgi(user);
        } else {
            ugi = UserGroupInformation.getLoginUser();
        }
        final String finalFName = new String(fname);

        final FileSystem defaultFs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
            public FileSystem run() throws URISyntaxException, IOException, InterruptedException {
                return FileSystem.get(new URI(finalFName), conf);
            }
        });

        fname = addUserHomeDirectoryIfApplicable(fname, user);
        URI u = new URI(fname);
        Path p = new Path(u).makeQualified(defaultFs);

        if (hadoopFsIsMissing(defaultFs, p))
            throw new FileNotFoundException("File " + fname + " does not exist.");

        return p;
    }

    /**
     * GET the given url.  Returns the number of bytes received.
     */
    public static int fetchUrl(URL url) throws IOException {
        URLConnection cnx = url.openConnection();
        InputStream in = cnx.getInputStream();

        byte[] buf = new byte[8192];
        int total = 0;
        int len = 0;
        while ((len = in.read(buf)) >= 0)
            total += len;

        return total;
    }

    /**
     * Set the environment variables to specify the hadoop user.
     */
    public static Map<String, String> hadoopUserEnv(String user, String overrideClasspath) {
        HashMap<String, String> env = new HashMap<String, String>();
        env.put("HADOOP_USER_NAME", user);

        if (overrideClasspath != null) {
            env.put("HADOOP_USER_CLASSPATH_FIRST", "true");
            String cur = System.getenv("HADOOP_CLASSPATH");
            if (TempletonUtils.isset(cur))
                overrideClasspath = overrideClasspath + ":" + cur;
            env.put("HADOOP_CLASSPATH", overrideClasspath);
        }

        return env;
    }

    // Add double quotes around the given input parameter if it is not already
    // quoted. Quotes are not allowed in the middle of the parameter, and
    // BadParam exception is thrown if this is the case.
    //
    // This method should be used to escape parameters before they get passed to
    // Windows cmd scripts (specifically, special characters like a comma or an
    // equal sign might be lost as part of the cmd script processing if not
    // under quotes).
    public static String quoteForWindows(String param) throws BadParam {
        if (Shell.WINDOWS) {
            if (param != null && param.length() > 0) {
                String nonQuotedPart = param;
                boolean addQuotes = true;
                if (param.charAt(0) == '\"' && param.charAt(param.length() - 1) == '\"') {
                    if (param.length() < 2)
                        throw new BadParam("Passed in parameter is incorrectly quoted: " + param);

                    addQuotes = false;
                    nonQuotedPart = param.substring(1, param.length() - 1);
                }

                // If we have any quotes other then the outside quotes, throw
                if (nonQuotedPart.contains("\"")) {
                    throw new BadParam("Passed in parameter is incorrectly quoted: " + param);
                }

                if (addQuotes) {
                    param = '\"' + param + '\"';
                }
            }
        }
        return param;
    }

    public static void addCmdForWindows(ArrayList<String> args) {
        if (Shell.WINDOWS) {
            args.add("cmd");
            args.add("/c");
            args.add("call");
        }
    }

    /**
     * replaces all occurrences of "\," with ","; returns {@code s} if no modifications needed
     */
    public static String unEscapeString(String s) {
        return s != null && s.contains("\\,") ? StringUtils.unEscapeString(s) : s;
    }

    /**
     * Find a jar that contains a class of the same name and which
     * file name matches the given pattern.
     *
     * @param clazz the class to find.
     * @param fileNamePattern regex pattern that must match the jar full path
     * @return a jar file that contains the class, or null
     */
    public static String findContainingJar(Class<?> clazz, String fileNamePattern) {
        ClassLoader loader = clazz.getClassLoader();
        String classFile = clazz.getName().replaceAll("\\.", "/") + ".class";
        try {
            for (final Enumeration<URL> itr = loader.getResources(classFile); itr.hasMoreElements();) {
                final URL url = itr.nextElement();
                if ("jar".equals(url.getProtocol())) {
                    String toReturn = url.getPath();
                    if (fileNamePattern == null || toReturn.matches(fileNamePattern)) {
                        toReturn = URLDecoder.decode(toReturn, "UTF-8");
                        return toReturn.replaceAll("!.*$", "");
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return null;
    }

    public static StringBuilder dumpPropMap(String header, Properties props) {
        Map<String, String> map = new HashMap<String, String>();
        for (Map.Entry<Object, Object> ent : props.entrySet()) {
            map.put(ent.getKey().toString(), ent.getValue() == null ? null : ent.getValue().toString());
        }
        return dumpPropMap(header, map);
    }

    public static StringBuilder dumpPropMap(String header, Map<String, String> map) {
        StringBuilder sb = new StringBuilder("START").append(header).append(":\n");
        List<String> propKeys = new ArrayList<String>(map.keySet());
        Collections.sort(propKeys);
        for (String propKey : propKeys) {
            if (propKey.toLowerCase().contains("path")) {
                StringTokenizer st = new StringTokenizer(map.get(propKey), File.pathSeparator);
                if (st.countTokens() > 1) {
                    sb.append(propKey).append("=\n");
                    while (st.hasMoreTokens()) {
                        sb.append("    ").append(st.nextToken()).append(File.pathSeparator).append('\n');
                    }
                } else {
                    sb.append(propKey).append('=').append(map.get(propKey)).append('\n');
                }
            } else {
                sb.append(propKey).append('=').append(map.get(propKey)).append('\n');
            }
        }
        return sb.append("END").append(header).append('\n');
    }
}