Java tutorial
/** * (C) 2010-2011 Alibaba Group Holding Limited. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 2 as published by the Free Software Foundation. * */ package com.taobao.datax.engine.schedule; import com.taobao.datax.common.constants.ExitStatus; import com.taobao.datax.common.exception.ExceptionTracker; import com.taobao.datax.common.exception.DataExchangeException; import com.taobao.datax.common.plugin.PluginParam; import com.taobao.datax.common.plugin.Pluginable; import com.taobao.datax.common.plugin.Reader; import com.taobao.datax.common.plugin.Writer; import com.taobao.datax.engine.conf.*; import com.taobao.datax.engine.plugin.BufferedLineExchanger; import com.taobao.datax.engine.storage.Storage; import com.taobao.datax.engine.storage.StoragePool; import com.taobao.datax.engine.tools.JobConfGenDriver; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; /** * Core class of DataX, schedule {@link Reader} & {@link Writer}. * * */ public class Engine { private static final Logger logger = Logger.getLogger(Engine.class); private static final int PERIOD = 10; private static final int MAX_CONCURRENCY = 64; private EngineConf engineConf; private Map<String, PluginConf> pluginReg; private MonitorPool readerMonitorPool; private MonitorPool writerMonitorPool; //private Map<String,PluginWorker> pluginWorkerCache = new HashMap<String, PluginWorker>(); private Map<String, Class> pluginClassCache = new HashMap<String, Class>(); private static Map<String, JarLoader> jarLoaderCache = new HashMap<String, JarLoader>(); /** * Constructor for {@link Engine} * * @param engineConf * Configuration for {@link Engine} * * @param pluginReg * Configurations for {@link Pluginable} * * */ public Engine(EngineConf engineConf, Map<String, PluginConf> pluginReg) { this.engineConf = engineConf; this.pluginReg = pluginReg; this.writerMonitorPool = new MonitorPool(); this.readerMonitorPool = new MonitorPool(); } /** * Start a DataX job. * * @param jobConf * Configuration for the DataX Job. * * @return 0 for success, others for failure. * * @throws Exception * */ public int start(JobConf jobConf) throws Exception { logger.info('\n' + engineConf.toString() + '\n'); logger.info('\n' + jobConf.toString() + '\n'); logger.info("DataX startups ."); logger.info("reader plugin:" + jobConf.getReaderConf().getName()); StoragePool storagePool = new StoragePool(jobConf, engineConf, PERIOD); NamedThreadPoolExecutor readerPool = initReaderPool(jobConf, storagePool); List<NamedThreadPoolExecutor> writerPool = initWriterPool(jobConf, storagePool); logger.info("DataX starts to exchange data ."); readerPool.shutdown(); for (NamedThreadPoolExecutor dp : writerPool) { dp.shutdown(); } int sleepCnt = 0; int retcode = 0; boolean hasError = false; String errorMsg = ""; NamedThreadPoolExecutor[] dps = writerPool.toArray(new NamedThreadPoolExecutor[0]); while (true) { /* check reader finish? */ boolean readerFinish = readerPool.isTerminated(); hasError = readerPool.hasError(); errorMsg = readerPool.getErrorMsg(); if (readerFinish) { storagePool.closeInput(); } boolean writerAllFinish = true; /* check each DumpPool */ for (NamedThreadPoolExecutor dp : dps) { hasError = hasError || dp.hasError(); if (hasError) { errorMsg = StringUtils.isNotBlank(errorMsg) ? errorMsg : dp.getErrorMsg(); readerPool.shutdownNow(); readerPool.awaitTermination(1, TimeUnit.SECONDS); for (NamedThreadPoolExecutor dp2 : writerPool) { dp2.shutdownNow(); dp2.awaitTermination(1, TimeUnit.SECONDS); } throw new DataExchangeException(errorMsg); } if (!readerFinish && dp.isTerminated()) { logger.error(String.format("DataX Writer %s failed .", dp.getName())); writerPool.remove(dp); } else if (!dp.isTerminated()) { writerAllFinish = false; } } if (readerFinish && writerAllFinish) { logger.info("DataX Reader post work begins ."); readerPool.doPost(); logger.info("DataX Reader post work ends ."); logger.info("DataX Writers post work begins ."); for (NamedThreadPoolExecutor dp : writerPool) { dp.getParam().setOppositeMetaData(readerPool.getParam().getMyMetaData()); dp.doPost(); } logger.info("DataX Writers post work ends ."); logger.info("DataX job succeed ."); break; } else if (!readerFinish && writerAllFinish) { logger.error("DataX Writers finished before reader finished."); logger.error("DataX job failed."); readerPool.shutdownNow(); readerPool.awaitTermination(3, TimeUnit.SECONDS); throw new DataExchangeException("DataX Writers finished before reader finished."); } Thread.sleep(1000); sleepCnt++; if (sleepCnt % PERIOD == 0) { /* reader&writer count num of thread */ StringBuilder sb = new StringBuilder(); sb.append(String.format("ReaderPool %s: Active Threads %d .", readerPool.getName(), readerPool.getActiveCount())); logger.info(sb.toString()); sb.setLength(0); for (NamedThreadPoolExecutor perWriterPool : writerPool) { sb.append(String.format("WriterPool %s: Active Threads %d .", perWriterPool.getName(), perWriterPool.getActiveCount())); logger.info(sb.toString()); sb.setLength(0); } /** * ? * **/ logger.info(storagePool.getPeriodState()); } } //?? begin ????????? ???? logger.info("check status twice begin"); hasError = readerPool.hasError(); errorMsg = readerPool.getErrorMsg(); logger.info("dps length:" + dps.length); for (NamedThreadPoolExecutor dp : dps) { hasError = hasError || dp.hasError(); if (hasError) { errorMsg = StringUtils.isNotBlank(errorMsg) ? errorMsg : dp.getErrorMsg(); throw new DataExchangeException(errorMsg); } } logger.info("check status twice end"); //?? end StringBuilder sb = new StringBuilder(); sb.append(storagePool.getTotalStat()); long discardLine = this.writerMonitorPool.getDiscardLine(); sb.append(String.format("%-26s: %19d\n", "Total discarded records", discardLine)); logger.info(sb.toString()); Reporter.stat.put("DISCARD_RECORDS", String.valueOf(discardLine)); Reporter reporter = Reporter.instance(); reporter.report(jobConf); long total = -1; boolean writePartlyFailed = false; for (Storage s : storagePool.getStorageForReader()) { String[] lineCounts = s.info().split(":"); long lineTx = Long.parseLong(lineCounts[1]); if (total != -1 && total != lineTx) { writePartlyFailed = true; logger.error("Writer partly failed, for " + total + "!=" + lineTx); } total = lineTx; } return writePartlyFailed ? 200 : retcode; } /** * configure log4j environment. * * @param jobId * DataX job id. * * */ public static void confLog(String jobId) { java.util.Calendar c = java.util.Calendar.getInstance(); java.text.SimpleDateFormat f = new java.text.SimpleDateFormat("yyyy-MM-dd"); String logDir = "logs/" + f.format(c.getTime()); System.setProperty("log.dir", logDir); f = new java.text.SimpleDateFormat("HHmmss"); String logFile = jobId + "." + f.format(c.getTime()) + ".log"; System.setProperty("log.file", logFile); PropertyConfigurator.configure("conf/log4j.properties"); } private NamedThreadPoolExecutor initReaderPool(JobConf jobConf, StoragePool sp) throws Exception { JobPluginConf readerJobConf = jobConf.getReaderConf(); String pluginName = readerJobConf.getName(); logger.info("pluginName:" + pluginName); PluginParam sparam = readerJobConf.getPluginParams(); //hdfs reader ? hive_sql hive reader logger.info("replace hdfs reader to hive reader check"); if ("hdfsreader".equals(pluginName) && StringUtils.isNotBlank(sparam.getValue("hive_sql", ""))) { pluginName = "hivereader"; //readerConf.setName("hivereader"); //readerConf.setClassName("com.taobao.datax.plugins.reader.hivereader.HiveReader"); logger.info("replace hdfs reader to hive reader"); } PluginConf readerConf = pluginReg.get(pluginName); String pluginPath = readerConf.getPath(); if (StringUtils.isEmpty(pluginPath)) { pluginPath = engineConf.getPluginRootPath() + "reader/" + pluginName; readerConf.setPath(pluginPath); } logger.info("path:" + pluginPath); Class<?> myClass = pluginClassCache.get(pluginPath); if (myClass == null) { logger.info(String.format("DataX Reader %s try to load path %s .", readerConf.getName(), pluginPath)); /*JarLoader jarLoader = new JarLoader( new String[] { pluginPath });*/ JarLoader jarLoader = getJarLoader(pluginPath); myClass = jarLoader.loadClass(readerConf.getClassName()); pluginClassCache.put(pluginPath, myClass); } ReaderWorker readerWorkerForPreAndPost = new ReaderWorker(readerConf, myClass); readerWorkerForPreAndPost.setParam(sparam); readerWorkerForPreAndPost.init(); logger.info("DataX Reader prepare work begins ."); int code = readerWorkerForPreAndPost.prepare(sparam); if (code != 0) { throw new DataExchangeException("DataX Reader prepare work failed!"); } logger.info("DataX Reader prepare work ends ."); logger.info("DataX Reader split work begins ."); List<PluginParam> readerSplitParams = readerWorkerForPreAndPost.doSplit(sparam); logger.info(String.format("DataX Reader splits this job into %d sub-jobs", readerSplitParams.size())); logger.info("DataX Reader split work ends ."); int concurrency = readerJobConf.getConcurrency(); if (concurrency <= 0 || concurrency > MAX_CONCURRENCY) { throw new IllegalArgumentException( String.format("Reader concurrency set to be %d, make sure it must be between [%d, %d] .", concurrency, 1, MAX_CONCURRENCY)); } concurrency = Math.min(concurrency, readerSplitParams.size()); if (concurrency <= 0) { concurrency = 1; } readerJobConf.setConcurrency(concurrency); NamedThreadPoolExecutor readerPool = new NamedThreadPoolExecutor(readerJobConf.getId(), readerJobConf.getConcurrency(), readerJobConf.getConcurrency(), 1L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>()); readerPool.setPostWorker(readerWorkerForPreAndPost); readerPool.setParam(sparam); readerPool.prestartAllCoreThreads(); logger.info("DataX Reader starts to read data ."); for (PluginParam param : readerSplitParams) { ReaderWorker readerWorker = new ReaderWorker(readerConf, pluginClassCache.get(pluginPath)); readerWorker.setParam(param); readerWorker.setLineSender(new BufferedLineExchanger(null, sp.getStorageForReader(), this.engineConf.getStorageBufferSize())); //readerPool.execute(readerWorker); readerPool.submitJob(readerWorker); readerMonitorPool.monitor(readerWorker); } return readerPool; } private List<NamedThreadPoolExecutor> initWriterPool(JobConf jobConf, StoragePool sp) throws Exception { List<NamedThreadPoolExecutor> writerPoolList = new ArrayList<NamedThreadPoolExecutor>(); List<JobPluginConf> writerJobConfs = jobConf.getWriterConfs(); for (JobPluginConf dpjc : writerJobConfs) { PluginConf writerConf = pluginReg.get(dpjc.getName()); if (writerConf.getPath() == null) { writerConf.setPath(engineConf.getPluginRootPath() + "writer/" + writerConf.getName()); } String pluginPath = writerConf.getPath(); PluginParam writerParam = dpjc.getPluginParams(); Class<?> myClass = pluginClassCache.get(pluginPath); if (myClass == null) { logger.info( String.format("DataX Writer %s try to load path %s .", writerConf.getName(), pluginPath)); /*JarLoader jarLoader = new JarLoader( new String[] { writerConf.getPath() });*/ if (pluginPath.endsWith("oraclewriter")) { //oracle writer jni ? logger.info("oraclewriter class load"); myClass = Class.forName("com.taobao.datax.plugins.writer.oraclewriter.OracleWriter"); } else { JarLoader jarLoader = getJarLoader(pluginPath); myClass = jarLoader.loadClass(writerConf.getClassName()); } pluginClassCache.put(pluginPath, myClass); } WriterWorker writerWorkerForPreAndPost = new WriterWorker(writerConf, myClass); writerWorkerForPreAndPost.setParam(writerParam); writerWorkerForPreAndPost.init(); logger.info("DataX Writer prepare work begins ."); int code = writerWorkerForPreAndPost.prepare(writerParam); if (code != 0) { throw new DataExchangeException("DataX Writer prepare work failed!"); } logger.info("DataX Writer prepare work ends ."); logger.info("DataX Writer split work begins ."); List<PluginParam> writerSplitParams = writerWorkerForPreAndPost.doSplit(writerParam); logger.info(String.format("DataX Writer splits this job into %d sub-jobs .", writerSplitParams.size())); logger.info("DataX Writer split work ends ."); int concurrency = dpjc.getConcurrency(); if (concurrency <= 0 || concurrency > MAX_CONCURRENCY) { throw new IllegalArgumentException( String.format("Writer concurrency set to be %d, make sure it must be between [%d, %d] .", concurrency, 1, MAX_CONCURRENCY)); } concurrency = Math.min(dpjc.getConcurrency(), writerSplitParams.size()); if (concurrency <= 0) { concurrency = 1; } dpjc.setConcurrency(concurrency); NamedThreadPoolExecutor writerPool = new NamedThreadPoolExecutor(dpjc.getName() + "-" + dpjc.getId(), dpjc.getConcurrency(), dpjc.getConcurrency(), 1L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>()); writerPool.setPostWorker(writerWorkerForPreAndPost); writerPool.setParam(writerParam); writerPool.prestartAllCoreThreads(); writerPoolList.add(writerPool); logger.info("DataX Writer starts to write data ."); for (PluginParam pp : writerSplitParams) { WriterWorker writerWorker = new WriterWorker(writerConf, pluginClassCache.get(pluginPath)); writerWorker.setParam(pp); writerWorker.setLineReceiver(new BufferedLineExchanger(sp.getStorageForWriter(dpjc.getId()), null, this.engineConf.getStorageBufferSize())); //writerPool.execute(writerWorker); writerPool.submitJob(writerWorker); writerMonitorPool.monitor(writerWorker); } } return writerPoolList; } /** * Program entry </br>> NOTE: The DataX Process exists code </br> exit with * 0: Job succeed </br> exit with 1: Job failed </br> exit with 2: Job * failed, e.g. connetion interrupted, if we try to rerun it in a few * seconds, it may succeed. * * * @param args cmd arguments * * @throws Exception*/ public static void main(String[] args) throws Exception { String jobDescFile = null; if (args.length < 1) { System.exit(JobConfGenDriver.produceXmlConf()); } else if (args.length == 1) { jobDescFile = args[0]; } else { System.out.printf("Usage: java -jar engine.jar job.xml ."); System.exit(ExitStatus.FAILED.value()); } confLog("BEFORE_CHRIST"); JobConf jobConf = ParseXMLUtil.loadJobConfig(jobDescFile); confLog(jobConf.getId()); EngineConf engineConf = ParseXMLUtil.loadEngineConfig(); Map<String, PluginConf> pluginConfs = ParseXMLUtil.loadPluginConfig(); Engine engine = new Engine(engineConf, pluginConfs); int retcode = 0; try { retcode = engine.start(jobConf); } catch (Exception e) { logger.error(ExceptionTracker.trace(e)); System.exit(ExitStatus.FAILED.value()); } System.exit(retcode); } private synchronized static JarLoader getJarLoader(String path) { JarLoader jarLoader = jarLoaderCache.get(path); if (jarLoader == null) { jarLoader = new JarLoader(new String[] { path }); jarLoaderCache.put(path, jarLoader); } return jarLoader; } }