Java tutorial
/* * Copyright 2003-2016 MarkLogic Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.marklogic.contentpump; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.util.Arrays; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.VersionInfo; import com.marklogic.contentpump.utilities.AuditUtil; import com.marklogic.contentpump.utilities.CommandlineOptions; import com.marklogic.contentpump.utilities.OptionsFileUtil; import com.marklogic.mapreduce.MarkLogicConstants; /** * ContentPump entry point. MarkLogic ContentPump is a tool that moves content * between a MarkLogic database and file system or copies content from one * MarkLogic database to another. * * @author jchen * */ public class ContentPump implements MarkLogicConstants, ConfigConstants { public static final Log LOG = LogFactory.getLog(ContentPump.class); public static void main(String[] args) throws Exception { if (args.length == 0) { printUsage(); System.exit(1); } String[] expandedArgs = null; int rc = 1; try { expandedArgs = OptionsFileUtil.expandArguments(args); rc = runCommand(expandedArgs); } catch (Exception ex) { LOG.error("Error while expanding arguments", ex); System.err.println(ex.getMessage()); System.err.println("Try 'mlcp help' for usage."); } System.exit(rc); } public static int runCommand(String[] args) throws IOException { // get command String cmd = args[0]; if (cmd.equalsIgnoreCase("help")) { printUsage(); return 1; } else if (cmd.equalsIgnoreCase("version")) { logVersions(); return 1; } Command command = Command.forName(cmd); // get options arguments String[] optionArgs = Arrays.copyOfRange(args, 1, args.length); if (LOG.isDebugEnabled()) { LOG.debug("Command: " + command); StringBuilder buf = new StringBuilder(); for (String arg : optionArgs) { buf.append(arg); buf.append(' '); } LOG.debug("Arguments: " + buf); } // parse hadoop specific options Configuration conf = new Configuration(); GenericOptionsParser genericParser = new GenericOptionsParser(conf, optionArgs); String[] remainingArgs = genericParser.getRemainingArgs(); // parse command specific options CommandlineOptions options = new CommandlineOptions(); command.configOptions(options); CommandLineParser parser = new GnuParser(); CommandLine cmdline; try { cmdline = parser.parse(options, remainingArgs); } catch (Exception e) { LOG.error("Error parsing command arguments: "); LOG.error(e.getMessage()); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } for (String arg : cmdline.getArgs()) { LOG.error("Unrecognized argument: " + arg); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } // check running mode and hadoop conf dir configuration String mode = cmdline.getOptionValue(MODE); String hadoopConfDir = System.getenv(HADOOP_CONFDIR_ENV_NAME); if (cmdline.hasOption(HADOOP_CONF_DIR)) { hadoopConfDir = cmdline.getOptionValue(HADOOP_CONF_DIR); } boolean distributed = hadoopConfDir != null && (mode == null || mode.equals(MODE_DISTRIBUTED)); if (MODE_DISTRIBUTED.equalsIgnoreCase(mode) && !distributed) { LOG.error("Cannot run in distributed mode. HADOOP_CONF_DIR is " + "not configured."); } if (LOG.isDebugEnabled()) { LOG.debug("Running in: " + (distributed ? "distributed " : "local") + "mode"); if (distributed) { LOG.debug("HADOOP_CONF_DIR is set to " + hadoopConfDir); } } conf.set(EXECUTION_MODE, distributed ? MODE_DISTRIBUTED : MODE_LOCAL); if (distributed) { if (!cmdline.hasOption(SPLIT_INPUT) && Command.getInputType(cmdline).equals(InputType.DELIMITED_TEXT)) { conf.setBoolean(ConfigConstants.CONF_SPLIT_INPUT, true); } File hdConfDir = new File(hadoopConfDir); try { checkHadoopConfDir(hdConfDir); } catch (IllegalArgumentException e) { LOG.error("Error found with Hadoop home setting", e); System.err.println(e.getMessage()); return 1; } // set new class loader based on Hadoop Conf Dir try { setClassLoader(hdConfDir, conf); } catch (Exception e) { LOG.error("Error configuring class loader", e); System.err.println(e.getMessage()); return 1; } } else { // running in local mode // Tell Hadoop that we are running in local mode. This is useful // when the user has Hadoop home or their Hadoop conf dir in their // classpath but want to run in local mode. conf.set(CONF_MAPREDUCE_JOBTRACKER_ADDRESS, "local"); } // create job Job job = null; try { if (distributed) { // So far all jobs created by mlcp are map only, // so set number of reduce tasks to 0. conf.setInt("mapreduce.job.reduces", 0); // No speculative runs since speculative tasks don't get to // clean up sessions properly conf.setBoolean("mapreduce.map.speculative", false); } else { // set working directory conf.set(CONF_MAPREDUCE_JOB_WORKING_DIR, System.getProperty("user.dir")); } job = command.createJob(conf, cmdline); } catch (Exception e) { // Print exception message. e.printStackTrace(); return 1; } LOG.info("Job name: " + job.getJobName()); // run job try { if (distributed) { // submit job submitJob(job); } else { runJobLocally(job, cmdline, command); } return 0; } catch (Exception e) { LOG.error("Error running a ContentPump job", e); e.printStackTrace(System.err); return 1; } } /** * Set class loader for current thread and for Confifguration based on * Hadoop home. * * @param hdConfDir Hadoop home directory * @param conf Hadoop configuration * @throws MalformedURLException */ private static void setClassLoader(File hdConfDir, Configuration conf) throws Exception { ClassLoader parent = conf.getClassLoader(); URL url = hdConfDir.toURI().toURL(); URL[] urls = new URL[1]; urls[0] = url; ClassLoader classLoader = new URLClassLoader(urls, parent); Thread.currentThread().setContextClassLoader(classLoader); conf.setClassLoader(classLoader); } private static void checkHadoopConfDir(File hdConfDir) throws IllegalArgumentException { if (!hdConfDir.exists()) { throw new IllegalArgumentException("Hadoop conf dir " + hdConfDir + " is not found."); } else if (!hdConfDir.isDirectory()) { throw new IllegalArgumentException("Hadoop conf dir " + hdConfDir + " is not a directory."); } else if (!hdConfDir.canRead()) { throw new IllegalArgumentException("Hadoop conf dir " + hdConfDir + " cannot be read."); } } private static void submitJob(Job job) throws Exception { String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME); // find job jar File cpHomeDir = new File(cpHome); FilenameFilter jobJarFilter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) { return true; } else { return false; } } }; File[] cpJars = cpHomeDir.listFiles(jobJarFilter); if (cpJars == null || cpJars.length == 0) { throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome); } if (cpJars.length > 1) { throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome); } // set job jar Configuration conf = job.getConfiguration(); conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString()); // find lib jars FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.endsWith(".jar") && !name.startsWith("hadoop")) { return true; } else { return false; } } }; // set lib jars StringBuilder jars = new StringBuilder(); for (File jar : cpHomeDir.listFiles(filter)) { if (jars.length() > 0) { jars.append(','); } jars.append(jar.toURI().toURL().toString()); } conf.set("tmpjars", jars.toString()); if (LOG.isTraceEnabled()) LOG.trace("LIBJARS:" + jars.toString()); job.waitForCompletion(true); AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters()); } private static void runJobLocally(Job job, CommandLine cmdline, Command cmd) throws Exception { LocalJobRunner runner = new LocalJobRunner(job, cmdline, cmd); runner.run(); AuditUtil.auditMlcpFinish(job.getConfiguration(), job.getJobName(), runner.getReporter().counters); } private static void printUsage() { System.out.println("usage: mlcp COMMAND [ARGS]\n"); System.out.println("Available commands:"); System.out.println(" IMPORT import data to a MarkLogic database"); System.out.println(" EXPORT export data from a MarkLogic database"); System.out.println(" COPY copy data from one MarkLogic database to another"); System.out.println(" EXTRACT extract data from MarkLogic forests"); System.out.println(" HELP list available commands"); System.out.println(" VERSION print the version"); } public static void logVersions() { System.out.println("ContentPump version: " + Versions.getVersion()); System.out.println("Java version: " + System.getProperty("java.version")); System.out.println("Hadoop version: " + VersionInfo.getVersion()); System.out.println("Supported MarkLogic versions: " + Versions.getMinServerVersion() + " - " + Versions.getMaxServerVersion()); } }