co.nubetech.hiho.job.ExportToOracleDb.java Source code

Java tutorial

Introduction

Here is the source code for co.nubetech.hiho.job.ExportToOracleDb.java

Source

/**
 * Copyright 2010 Nube Technologies
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 * http://www.apache.org/licenses/LICENSE-2.0 
 * 
 * Unless required by applicable law or agreed to in writing, software distributed 
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 * CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and limitations under the License. 
 */
package co.nubetech.hiho.job;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.Map.Entry;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import co.nubetech.apache.hadoop.DBConfiguration;
import co.nubetech.apache.hadoop.MRJobConfig;
import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.common.HIHOException;
import co.nubetech.hiho.mapreduce.OracleLoadMapper;
import co.nubetech.hiho.mapreduce.lib.input.FileStreamInputFormat;

public class ExportToOracleDb extends Configured implements Tool {
    private final static Logger logger = Logger.getLogger(co.nubetech.hiho.job.ExportToOracleDb.class);

    private String inputPath = null;

    public void populateConfiguration(String[] args, Configuration conf) {
        for (int i = 0; i < args.length - 1; i++) {
            if ("-inputPath".equals(args[i])) {
                inputPath = args[++i];
            } else if ("-oracleFtpAddress".equals(args[i])) {
                conf.set(HIHOConf.ORACLE_FTP_ADDRESS, args[++i]);
            } else if ("-oracleFtpPortNumber".equals(args[i])) {
                conf.set(HIHOConf.ORACLE_FTP_PORT, args[++i]);
            } else if ("-oracleFtpUserName".equals(args[i])) {
                conf.set(HIHOConf.ORACLE_FTP_USER, args[++i]);
            } else if ("-oracleFtpPassword".equals(args[i])) {
                conf.set(HIHOConf.ORACLE_FTP_PASSWORD, args[++i]);
            } else if ("-oracleExternalTableDirectory".equals(args[i])) {
                conf.set(HIHOConf.ORACLE_EXTERNAL_TABLE_DIR, args[++i]);
            } else if ("-driver".equals(args[i])) {
                conf.set(DBConfiguration.DRIVER_CLASS_PROPERTY, args[++i]);
            } else if ("-url".equals(args[i])) {
                conf.set(DBConfiguration.URL_PROPERTY, args[++i]);
            } else if ("-userName".equals(args[i])) {
                conf.set(DBConfiguration.USERNAME_PROPERTY, args[++i]);
            } else if ("-password".equals(args[i])) {
                conf.set(DBConfiguration.PASSWORD_PROPERTY, args[++i]);
            } else if ("-externalTable".equals(args[i])) {
                conf.set(HIHOConf.EXTERNAL_TABLE_DML, args[++i]);
            }
        }
    }

    public void checkMandatoryConfs(Configuration conf) throws HIHOException {
        if (inputPath == null) {
            throw new HIHOException("The provided inputPath is empty, please specify inputPath");
        }
        if (conf.get(HIHOConf.ORACLE_FTP_USER) == null) {
            throw new HIHOException("The Oracle FTP UserName is not specified, please specify Oracle FTP UserName");
        }
        if (conf.get(HIHOConf.ORACLE_FTP_ADDRESS) == null) {
            throw new HIHOException("The Oracle FTP Address is not specified, please specify Oracle FTP Address");
        }
        if (conf.get(HIHOConf.ORACLE_FTP_PORT) == null) {
            throw new HIHOException(
                    "The Oracle FTP Port Number is not defined, please specify Oracle FTP Port Number");
        }
        if (conf.get(HIHOConf.ORACLE_FTP_PASSWORD) == null) {
            throw new HIHOException("The Oracle FTP Password is not defined, please specify Oracle FTP Password");
        }
        if (conf.get(HIHOConf.ORACLE_EXTERNAL_TABLE_DIR) == null) {
            throw new HIHOException(
                    "The Oracle External Table Directory is not specified, please specify Oracle External Table Directory");
        }
        if (conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY) == null) {
            throw new HIHOException("The JDBC Driver is not specified, please specify JDBC Driver");
        }
        if (conf.get(DBConfiguration.URL_PROPERTY) == null) {
            throw new HIHOException("The JDBC URL is not specified, please specify JDBC URL");
        }
        if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
            throw new HIHOException("The JDBC USERNAME is not specified, please specify JDBC USERNAME");
        }
        if (conf.get(DBConfiguration.PASSWORD_PROPERTY) == null) {
            throw new HIHOException("The JDBC PASSWORD is not specified, please specify JDBC PASSWORD");
        }
        if (conf.get(HIHOConf.EXTERNAL_TABLE_DML) == null) {
            throw new HIHOException(
                    "The query to create external table is not specified, please specify the create query for external table");
        }

    }

    @Override
    public int run(String[] args) throws IOException {
        Configuration conf = getConf();
        for (Entry<String, String> entry : conf) {
            logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
        }

        for (int i = 0; i < args.length; i++) {
            logger.debug("Remaining arguments are" + " " + args[i]);
        }
        populateConfiguration(args, conf);

        try {
            checkMandatoryConfs(conf);
        } catch (HIHOException e1) {
            e1.printStackTrace();
            throw new IOException(e1);
        }

        Job job = new Job(conf);
        job.setJobName("OracleLoading");
        job.setMapperClass(OracleLoadMapper.class);
        job.setJarByClass(ExportToOracleDb.class);
        job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));

        try {
            // we first create the external table definition
            String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML);
            // create table if user has specified
            if (query != null) {
                this.runQuery(query, conf);
            }
        } catch (HIHOException e1) {

            e1.printStackTrace();
        }

        // verify required properties are loaded

        job.setNumReduceTasks(0);
        job.setInputFormatClass(FileStreamInputFormat.class);
        FileStreamInputFormat.addInputPath(job, new Path(inputPath));
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);
        // job.setJarByClass(com.mysql.jdbc.Driver.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        int ret = 0;
        try {

            ret = job.waitForCompletion(true) ? 0 : 1;
        } catch (Exception e) {
            e.printStackTrace();
        }
        // run alter table query and add locations
        try {
            this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf);
        } catch (HIHOException e1) {

            e1.printStackTrace();
        }
        return ret;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new ExportToOracleDb(), args);
        System.exit(res);
    }

    public static String getAlterTableDML(Path inputPath, Configuration conf) throws IOException, HIHOException {
        // after running the job, we need to alter the external table to take
        // care of the files we have added
        FileStatus[] contents = inputPath.getFileSystem(conf).listStatus(inputPath);
        if (contents != null) {
            StringBuilder dml = new StringBuilder();
            dml.append(" ALTER TABLE ");
            dml.append(getTableName(conf.get(HIHOConf.EXTERNAL_TABLE_DML)));
            dml.append(" LOCATION (");
            int i = 0;
            for (FileStatus content : contents) {
                String fileName = content.getPath().getName();
                dml.append("\'");
                dml.append(fileName);
                dml.append("\'");
                // add comma uptil one and last
                if (i < contents.length - 1) {
                    dml.append(",");
                }
                i++;
            }
            // execute dml to alter location

            dml.append(")");
            return dml.toString();
        }
        return null;
    }

    public static String getTableName(String query) throws HIHOException {
        String tableName = null;
        if (query == null)
            throw new HIHOException("Cannot read query");

        try {
            StringTokenizer tokenizer = new StringTokenizer(query, "() ");
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if (token.equalsIgnoreCase("table")) {
                    tableName = tokenizer.nextToken();
                }
            }
        } catch (Exception e) {
            throw new HIHOException("Unable to get table name from the external table query");
        }
        if (tableName == null) {
            throw new HIHOException("Unable to get table name from the external table query");
        }
        return tableName;
    }

    public static String getExternalDir(String query) throws HIHOException {
        String tableName = null;
        if (query == null)
            throw new HIHOException("Cannot read query");

        try {
            StringTokenizer tokenizer = new StringTokenizer(query, " ");
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if (token.equalsIgnoreCase("directory")) {
                    tableName = tokenizer.nextToken();
                }
            }
        } catch (Exception e) {
            throw new HIHOException("Unable to get directory name from the external table query");
        }
        if (tableName == null) {
            throw new HIHOException("Unable to get directory name from the external table query");
        }
        return tableName;
    }

    /**
     * This function creates the external table. the following queries have been
     * tested: create table age( i Number, n Varchar(20), a Number )
     * organization external ( type oracle_loader default directory ext_dir
     * access parameters ( records delimited by newline fields terminated by ','
     * missing field values are null ) location('file.txt') ) reject limit
     * unlimited
     * 
     * 
     * The following queries are bound to fail create table ( i Number, n
     * Varchar(20), a Number ) organization external ( type oracle_loader
     * default directory ext_dir access parameters ( records delimited by
     * newline fields terminated by ',' missing field values are null )
     * location( 'file.txt' ) ) reject limit unlimited
     * 
     * @param query
     * @param conf
     * @throws HIHOException
     */
    public void runQuery(String query, Configuration conf) throws HIHOException {
        try {
            String driver = conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY);
            String url = conf.get(DBConfiguration.URL_PROPERTY);
            String usr = conf.get(DBConfiguration.USERNAME_PROPERTY);
            String pswd = conf.get(DBConfiguration.PASSWORD_PROPERTY);

            Class.forName(driver);
            Connection conn = DriverManager.getConnection(url, usr, pswd);

            Statement stmt = conn.createStatement();

            stmt.executeQuery(query);

            stmt.close();
            conn.close();

        } catch (Exception e) {
            e.printStackTrace();
            throw new HIHOException("Sql syntax error in query " + query + e);
        }

    }

}