org.apache.hadoop.sqoop.orm.CompilationManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.sqoop.orm.CompilationManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.sqoop.orm;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.sqoop.ImportOptions;
import org.apache.hadoop.sqoop.util.FileListing;

/**
 * Manages the compilation of a bunch of .java files into .class files
 * and eventually a jar.
 *
 * Also embeds this program's jar into the lib/ directory inside the compiled jar
 * to ensure that the job runs correctly.
 *
 * 
 *
 */
public class CompilationManager {

    public static final Log LOG = LogFactory.getLog(CompilationManager.class.getName());

    private ImportOptions options;
    private List<String> sources;

    public CompilationManager(final ImportOptions opts) {
        options = opts;
        sources = new ArrayList<String>();
    }

    public void addSourceFile(String sourceName) {
        sources.add(sourceName);
    }

    /**
     * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home.
     * If that doesn't work, check our classpath.
     * @return the filename of the hadoop-*-core.jar file.
     */
    private String findHadoopCoreJar() {
        String hadoopHome = options.getHadoopHome();

        if (null == hadoopHome) {
            LOG.info("$HADOOP_HOME is not set");
            return findJarForClass(JobConf.class);
        }

        if (!hadoopHome.endsWith(File.separator)) {
            hadoopHome = hadoopHome + File.separator;
        }

        File hadoopHomeFile = new File(hadoopHome);
        LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath());
        File[] entries = hadoopHomeFile.listFiles();

        if (null == entries) {
            LOG.warn("HADOOP_HOME appears empty or missing");
            return findJarForClass(JobConf.class);
        }

        for (File f : entries) {
            if (f.getName().startsWith("hadoop-") && f.getName().endsWith("-core.jar")) {
                LOG.info("Found hadoop core jar at: " + f.getAbsolutePath());
                return f.getAbsolutePath();
            }
        }

        return findJarForClass(JobConf.class);
    }

    /**
     * Compile the .java files into .class files via embedded javac call.
     */
    public void compile() throws IOException {
        List<String> args = new ArrayList<String>();

        // ensure that the jar output dir exists.
        String jarOutDir = options.getJarOutputDir();
        boolean mkdirSuccess = new File(jarOutDir).mkdirs();
        if (!mkdirSuccess) {
            LOG.debug("Warning: Could not make directories for " + jarOutDir);
        }

        // find hadoop-*-core.jar for classpath.
        String coreJar = findHadoopCoreJar();
        if (null == coreJar) {
            // Couldn't find a core jar to insert into the CP for compilation.
            // If, however, we're running this from a unit test, then the path
            // to the .class files might be set via the hadoop.alt.classpath property
            // instead. Check there first.
            String coreClassesPath = System.getProperty("hadoop.alt.classpath");
            if (null == coreClassesPath) {
                // no -- we're out of options. Fail.
                throw new IOException("Could not find hadoop core jar!");
            } else {
                coreJar = coreClassesPath;
            }
        }

        // find sqoop jar for compilation classpath
        String sqoopJar = findThisJar();
        if (null != sqoopJar) {
            sqoopJar = File.pathSeparator + sqoopJar;
        } else {
            LOG.warn("Could not find sqoop jar; child compilation may fail");
            sqoopJar = "";
        }

        String curClasspath = System.getProperty("java.class.path");

        args.add("-sourcepath");
        String srcOutDir = options.getCodeOutputDir();
        args.add(srcOutDir);

        args.add("-d");
        args.add(jarOutDir);

        args.add("-classpath");
        args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar);

        // add all the source files
        for (String srcfile : sources) {
            args.add(srcOutDir + srcfile);
        }

        StringBuilder sb = new StringBuilder();
        for (String arg : args) {
            sb.append(arg + " ");
        }

        // NOTE(aaron): Usage is at http://java.sun.com/j2se/1.5.0/docs/tooldocs/solaris/javac.html
        LOG.info("Invoking javac with args: " + sb.toString());
        int javacRet = com.sun.tools.javac.Main.compile(args.toArray(new String[0]));
        if (javacRet != 0) {
            throw new IOException("javac exited with status " + javacRet);
        }
    }

    public String getJarFilename() {
        String jarOutDir = options.getJarOutputDir();
        String tableName = options.getTableName();
        if (null != tableName && tableName.length() > 0) {
            return jarOutDir + tableName + ".jar";
        } else if (this.sources.size() == 1) {
            // if we only have one source file, find it's base name,
            // turn "foo.java" into "foo", and then return jarDir + "foo" + ".jar"
            String srcFileName = this.sources.get(0);
            String basename = new File(srcFileName).getName();
            String[] parts = basename.split("\\.");
            String preExtPart = parts[0];
            return jarOutDir + preExtPart + ".jar";
        } else {
            return jarOutDir + "sqoop.jar";
        }
    }

    /**
     * Create an output jar file to use when executing MapReduce jobs
     */
    public void jar() throws IOException {
        String jarOutDir = options.getJarOutputDir();
        List<File> outDirEntries = FileListing.getFileListing(new File(jarOutDir));

        String jarFilename = getJarFilename();

        LOG.info("Writing jar file: " + jarFilename);

        findThisJar();
        File jarFileObj = new File(jarFilename);
        if (jarFileObj.exists()) {
            if (!jarFileObj.delete()) {
                LOG.warn("Could not remove existing jar file: " + jarFilename);
            }
        }

        FileOutputStream fstream = null;
        JarOutputStream jstream = null;
        try {
            fstream = new FileOutputStream(jarFilename);
            jstream = new JarOutputStream(fstream);

            // for each input class file, create a zipfile entry for it,
            // read the file into a buffer, and write it to the jar file.

            for (File entry : outDirEntries) {
                if (entry.equals(jarFileObj)) {
                    // don't include our own jar!
                    continue;
                } else if (entry.isDirectory()) {
                    // don't write entries for directories
                    continue;
                } else {
                    String fileName = entry.getName();

                    boolean include = fileName.endsWith(".class") && sources
                            .contains(fileName.substring(0, fileName.length() - ".class".length()) + ".java");

                    if (include) {
                        // include this file.

                        // chomp off the portion of the full path that is shared
                        // with the base directory where class files were put;
                        // we only record the subdir parts in the zip entry.
                        String fullPath = entry.getAbsolutePath();
                        String chompedPath = fullPath.substring(jarOutDir.length());

                        LOG.debug("Got classfile: " + entry.getPath() + " -> " + chompedPath);
                        ZipEntry ze = new ZipEntry(chompedPath);
                        jstream.putNextEntry(ze);
                        copyFileToStream(entry, jstream);
                        jstream.closeEntry();
                    }
                }
            }

            // put our own jar in there in its lib/ subdir
            String thisJarFile = findThisJar();
            if (null != thisJarFile) {
                File thisJarFileObj = new File(thisJarFile);
                String thisJarBasename = thisJarFileObj.getName();
                String thisJarEntryName = "lib" + File.separator + thisJarBasename;
                ZipEntry ze = new ZipEntry(thisJarEntryName);
                jstream.putNextEntry(ze);
                copyFileToStream(thisJarFileObj, jstream);
                jstream.closeEntry();
            } else {
                // couldn't find our own jar (we were running from .class files?)
                LOG.warn("Could not find jar for Sqoop; MapReduce jobs may not run correctly.");
            }
        } finally {
            IOUtils.closeStream(jstream);
            IOUtils.closeStream(fstream);
        }
    }

    private static final int BUFFER_SZ = 4096;

    /**
     * utility method to copy a .class file into the jar stream.
     * @param f
     * @param ostream
     * @throws IOException
     */
    private void copyFileToStream(File f, OutputStream ostream) throws IOException {
        FileInputStream fis = new FileInputStream(f);
        byte[] buffer = new byte[BUFFER_SZ];
        try {
            while (true) {
                int bytesReceived = fis.read(buffer);
                if (bytesReceived < 1) {
                    break;
                }

                ostream.write(buffer, 0, bytesReceived);
            }
        } finally {
            fis.close();
        }
    }

    private String findThisJar() {
        return findJarForClass(CompilationManager.class);
    }

    // method mostly cloned from o.a.h.mapred.JobConf.findContainingJar()
    private String findJarForClass(Class<? extends Object> classObj) {
        ClassLoader loader = classObj.getClassLoader();
        String classFile = classObj.getName().replaceAll("\\.", "/") + ".class";
        try {
            for (Enumeration<URL> itr = loader.getResources(classFile); itr.hasMoreElements();) {
                URL url = (URL) itr.nextElement();
                if ("jar".equals(url.getProtocol())) {
                    String toReturn = url.getPath();
                    if (toReturn.startsWith("file:")) {
                        toReturn = toReturn.substring("file:".length());
                    }
                    toReturn = URLDecoder.decode(toReturn, "UTF-8");
                    return toReturn.replaceAll("!.*$", "");
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return null;
    }
}