com.taobao.datax.engine.conf.ParseXMLUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.taobao.datax.engine.conf.ParseXMLUtil.java

Source

/**
 * (C) 2010-2011 Alibaba Group Holding Limited.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License 
 * version 2 as published by the Free Software Foundation. 
 * 
 */

package com.taobao.datax.engine.conf;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import com.taobao.datax.common.constants.Constants;
import com.taobao.datax.common.exception.DataExchangeException;
import com.taobao.datax.common.plugin.PluginParam;
import com.taobao.datax.common.util.StrUtils;
import com.taobao.datax.engine.plugin.DefaultPluginParam;

/**
 * A utility tool to parse configure XML file.
 * 
 */
public class ParseXMLUtil {
    private static final Logger LOG = Logger.getLogger(ParseXMLUtil.class);

    /**
     * Parse dataX job configuration file.
     * 
     * @param filename
     *            Job configure filename.
     * 
     * @return a JobConf instance which describes this Job configuration file.
     * 
     * */
    @SuppressWarnings("unchecked")
    public static JobConf loadJobConfig(String filename) {
        JobConf job = new JobConf();
        Document document;
        try {
            String xml = FileUtils.readFileToString(new File(filename), "UTF-8");
            document = DocumentHelper.parseText(xml);
        } catch (DocumentException e) {
            LOG.error("DataX Can not find " + filename + " .");
            throw new DataExchangeException(e.getCause());
        } catch (IOException e) {
            LOG.error(String.format("DataX read config file %s failed .", filename));
            throw new DataExchangeException(e.getCause());
        }

        String xpath = "/jobs/job";
        Element jobE = (Element) document.selectSingleNode(xpath);
        job.setId(jobE.attributeValue("id") == null ? "DataX_is_still_a_virgin" : jobE.attributeValue("id").trim());

        JobPluginConf readerJobConf = new JobPluginConf();
        Element readerE = (Element) document.selectSingleNode(xpath + "/loader");
        if (null == readerE)
            readerE = (Element) document.selectSingleNode(xpath + "/reader");

        String readerId = readerE.attributeValue("id");
        readerJobConf.setId(readerId == null ? "virgin-reader" : readerId.trim());
        Element readerPluinE = (Element) readerE.selectSingleNode("plugin");
        readerJobConf.setName(readerPluinE.getStringValue().trim().replace("loader", "reader").toLowerCase());

        Map<String, String> readerParamMap = new HashMap<String, String>();

        /*
         * for historic reason, we need to check concurrency node first add by
         * bazhen.csy
         */
        if (readerE.selectSingleNode("concurrency") != null) {
            Element readerConcurrencyE = (Element) readerE.selectSingleNode("concurrency");
            readerParamMap.put("concurrency", StrUtils.replaceString(readerConcurrencyE.attributeValue("core")));
        }

        List<Element> readerParamE = (List<Element>) readerE.selectNodes("param");
        for (Element e : readerParamE) {
            readerParamMap.put(e.attributeValue("key").toLowerCase(),
                    StrUtils.replaceString(e.attributeValue("value").trim()));
        }

        PluginParam readerPluginParam = new DefaultPluginParam(readerParamMap);

        //      if (!readerPluginParam.hasValue("concurrency")
        //            || readerPluginParam.getIntValue("concurrency", 1) < 0) {
        //         throw new IllegalArgumentException(
        //               "Reader option [concurrency] error !");
        //      }

        readerJobConf.setPluginParams(readerPluginParam);

        List<JobPluginConf> writerJobConfs = new ArrayList<JobPluginConf>();
        List<Element> writerEs = (List<Element>) document.selectNodes(xpath + "/dumper");
        if (null == writerEs || 0 == writerEs.size())
            writerEs = (List<Element>) document.selectNodes(xpath + "/writer");

        for (Element writerE : writerEs) {
            JobPluginConf writerPluginConf = new JobPluginConf();

            String writerId = writerE.attributeValue("id");
            writerPluginConf.setId(writerId == null ? "virgin-writer" : writerId.trim());

            String destructLimit = writerE.attributeValue("destructlimit");
            if (destructLimit != null) {
                writerPluginConf.setDestructLimit(Integer.valueOf(destructLimit));
            }

            Element writerPluginE = (Element) writerE.selectSingleNode("plugin");
            writerPluginConf
                    .setName(writerPluginE.getStringValue().trim().replace("dumper", "writer").toLowerCase());

            Map<String, String> writerParamMap = new HashMap<String, String>();

            /*
             * for historic reason, we need to check concurrency node add by
             * bazhen.csy
             */
            if (writerE.selectSingleNode("concurrency") != null) {
                Element writerConcurrencyE = (Element) writerE.selectSingleNode("concurrency");
                writerParamMap.put("concurrency",
                        StrUtils.replaceString(writerConcurrencyE.attributeValue("core")));
            }

            List<Element> writerParamE = (List<Element>) writerE.selectNodes("param");
            for (Element e : writerParamE) {
                writerParamMap.put(e.attributeValue("key").toLowerCase(),
                        StrUtils.replaceString(e.attributeValue("value").trim()));
            }

            PluginParam writerPluginParam = new DefaultPluginParam(writerParamMap);

            writerPluginConf.setPluginParams(writerPluginParam);
            writerJobConfs.add(writerPluginConf);
        }

        job.setReaderConf(readerJobConf);
        job.setWriterConfs(writerJobConfs);

        return job;
    }

    /**
     * Parse plugins configuration file, get all plugin configurations
     * 
     * @return a map mapping plugin name to plugin configurations
     * */
    @SuppressWarnings("unchecked")
    public static Map<String, PluginConf> loadPluginConfig() {
        Map<String, PluginConf> plugins = new HashMap<String, PluginConf>();
        File file = new File(Constants.PLUGINSXML);
        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(file);
        } catch (DocumentException e) {
            LOG.error("DataX Can not find " + Constants.PLUGINSXML);
            throw new DataExchangeException(e.getCause());
        }
        String xpath = "/plugins/plugin";
        List<Node> pluginnode = (List<Node>) document.selectNodes(xpath);
        for (Node node : pluginnode) {
            PluginConf plugin = new PluginConf();
            plugin.setVersion(node.selectSingleNode("./version").getStringValue());
            plugin.setName(node.selectSingleNode("./name").getStringValue());
            plugin.setTarget(node.selectSingleNode("./target").getStringValue());
            plugin.setJar(node.selectSingleNode("./jar").getStringValue());
            plugin.setType(node.selectSingleNode("./type").getStringValue());
            plugin.setClassName(node.selectSingleNode("./class").getStringValue());
            plugin.setMaxthreadnum(Integer.parseInt(node.selectSingleNode("./maxthreadnum").getStringValue()));
            if (node.selectSingleNode("./path") != null)
                plugin.setPath(node.selectSingleNode("./path").getStringValue());
            plugins.put(plugin.getName(), plugin);
        }
        return plugins;
    }

    /**
     * Parse engine configuration file
     * 
     * @return {@link EngineConf}.
     * 
     * */
    public static EngineConf loadEngineConfig() {

        EngineConf engineConf = EngineConf.getInstance();
        File file = new File(Constants.ENGINEXML);
        SAXReader saxReader = new SAXReader();
        Document document = null;

        try {
            document = saxReader.read(file);
        } catch (DocumentException e) {
            LOG.error("DataX Can not find " + Constants.ENGINEXML + " , program exits .");
            throw new DataExchangeException(e);
        }

        engineConf.setVersion(Integer.parseInt(document.selectSingleNode("/engine/version").getStringValue()));

        engineConf.setStorageClassName(document.selectSingleNode("/engine/storage/class").getStringValue());
        engineConf.setStorageLineLimit(StrUtils.getIntParam(
                document.selectSingleNode("/engine/storage/linelimit").getStringValue(), 10000, 100, 1000000));
        engineConf.setStorageByteLimit(
                StrUtils.getIntParam(document.selectSingleNode("/engine/storage/bytelimit").getStringValue(),
                        1000000, 10000, 100000000));
        engineConf.setStorageBufferSize(StrUtils
                .getIntParam(document.selectSingleNode("/engine/storage/buffersize").getStringValue(), 10, 1, 500));

        engineConf.setPluginRootPath(document.selectSingleNode("/engine/pluginrootpath").getStringValue());

        return engineConf;
    }

    /**
     * Parse engine configuration file, and get DataX Statistics report url.
     * NOTE: This feature can be abandon.
     * 
     * @return DataX statistics report URL
     * 
     * */
    public static String loadScheduleURL() {
        String url = "";
        File file = new File(Constants.ENGINEXML);
        SAXReader saxReader = new SAXReader();
        Document document = null;
        try {
            document = saxReader.read(file);
            url = document.selectSingleNode("/engine/schedule/url").getStringValue();
        } catch (DocumentException e) {
            LOG.error("Can not find " + Constants.ENGINEXML + ", please check it again.");
            throw new DataExchangeException(e.getCause());
        }
        return url.trim();
    }
}