org.apache.hive.hplsql.Copy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hplsql.Copy.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.hplsql;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.IOException;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hive.hplsql.Var;
import org.antlr.v4.runtime.ParserRuleContext;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang3.tuple.Pair;

public class Copy {

    Exec exec;
    Timer timer = new Timer();
    boolean trace = false;
    boolean info = false;

    long srcSizeInBytes = 0;

    String delimiter = "\t";
    boolean sqlInsert = false;
    String sqlInsertName;
    String targetConn;
    int batchSize = 1000;

    boolean overwrite = false;
    boolean delete = false;
    boolean ignore = false;

    Copy(Exec e) {
        exec = e;
        trace = exec.getTrace();
        info = exec.getInfo();
    }

    /**
     * Run COPY command
     */
    Integer run(HplsqlParser.Copy_stmtContext ctx) {
        trace(ctx, "COPY");
        initOptions(ctx);
        StringBuilder sql = new StringBuilder();
        String conn = null;
        if (ctx.table_name() != null) {
            String table = evalPop(ctx.table_name()).toString();
            conn = exec.getObjectConnection(ctx.table_name().getText());
            sql.append("SELECT * FROM ");
            sql.append(table);
        } else {
            sql.append(evalPop(ctx.select_stmt()).toString());
            conn = exec.getStatementConnection();
            if (trace) {
                trace(ctx, "Statement:\n" + sql);
            }
        }
        Query query = exec.executeQuery(ctx, sql.toString(), conn);
        if (query.error()) {
            exec.signal(query);
            return 1;
        }
        exec.setSqlSuccess();
        try {
            if (targetConn != null) {
                copyToTable(ctx, query);
            } else {
                copyToFile(ctx, query);
            }
        } catch (Exception e) {
            exec.signal(e);
            return 1;
        } finally {
            exec.closeQuery(query, conn);
        }
        return 0;
    }

    /**
     * Copy the query results to another table
     * @throws Exception 
     */
    void copyToTable(HplsqlParser.Copy_stmtContext ctx, Query query) throws Exception {
        ResultSet rs = query.getResultSet();
        if (rs == null) {
            return;
        }
        ResultSetMetaData rm = rs.getMetaData();
        int cols = rm.getColumnCount();
        int rows = 0;
        if (trace) {
            trace(ctx, "SELECT executed: " + cols + " columns");
        }
        Connection conn = exec.getConnection(targetConn);
        StringBuilder sql = new StringBuilder();
        sql.append("INSERT INTO " + sqlInsertName + " VALUES (");
        for (int i = 0; i < cols; i++) {
            sql.append("?");
            if (i + 1 < cols) {
                sql.append(",");
            }
        }
        sql.append(")");
        PreparedStatement ps = conn.prepareStatement(sql.toString());
        long start = timer.start();
        long prev = start;
        boolean batchOpen = false;
        while (rs.next()) {
            for (int i = 1; i <= cols; i++) {
                ps.setObject(i, rs.getObject(i));
            }
            rows++;
            if (batchSize > 1) {
                ps.addBatch();
                batchOpen = true;
                if (rows % batchSize == 0) {
                    ps.executeBatch();
                    batchOpen = false;
                }
            } else {
                ps.executeUpdate();
            }
            if (trace && rows % 100 == 0) {
                long cur = timer.current();
                if (cur - prev > 10000) {
                    trace(ctx, "Copying rows: " + rows + " (" + rows / ((cur - start) / 1000) + " rows/sec)");
                    prev = cur;
                }
            }
        }
        if (batchOpen) {
            ps.executeBatch();
        }
        ps.close();
        exec.returnConnection(targetConn, conn);
        exec.setRowCount(rows);
        long elapsed = timer.stop();
        if (info) {
            info(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + rows / (elapsed / 1000)
                    + " rows/sec");
        }
    }

    /**
     * Copy the query results to a file
     * @throws Exception 
     */
    void copyToFile(HplsqlParser.Copy_stmtContext ctx, Query query) throws Exception {
        ResultSet rs = query.getResultSet();
        if (rs == null) {
            return;
        }
        ResultSetMetaData rm = rs.getMetaData();
        String filename = null;
        if (ctx.copy_target().expr() != null) {
            filename = evalPop(ctx.copy_target().expr()).toString();
        } else {
            filename = ctx.copy_target().getText();
        }
        byte[] del = delimiter.getBytes();
        byte[] rowdel = "\n".getBytes();
        byte[] nullstr = "NULL".getBytes();
        int cols = rm.getColumnCount();
        int rows = 0;
        long bytes = 0;
        if (trace || info) {
            String mes = "Query executed: " + cols + " columns, output file: " + filename;
            if (trace) {
                trace(ctx, mes);
            } else {
                info(ctx, mes);
            }
        }
        java.io.File file = null;
        File hdfsFile = null;
        if (ctx.T_HDFS() == null) {
            file = new java.io.File(filename);
        } else {
            hdfsFile = new File();
        }
        OutputStream out = null;
        timer.start();
        try {
            if (file != null) {
                if (!file.exists()) {
                    file.createNewFile();
                }
                out = new FileOutputStream(file, false /*append*/);
            } else {
                out = hdfsFile.create(filename, true /*overwrite*/);
            }
            String col;
            String sql = "";
            if (sqlInsert) {
                sql = "INSERT INTO " + sqlInsertName + " VALUES (";
                rowdel = ");\n".getBytes();
            }
            while (rs.next()) {
                if (sqlInsert) {
                    out.write(sql.getBytes());
                }
                for (int i = 1; i <= cols; i++) {
                    if (i > 1) {
                        out.write(del);
                        bytes += del.length;
                    }
                    col = rs.getString(i);
                    if (col != null) {
                        if (sqlInsert) {
                            col = Utils.quoteString(col);
                        }
                        byte[] b = col.getBytes();
                        out.write(b);
                        bytes += b.length;
                    } else if (sqlInsert) {
                        out.write(nullstr);
                    }
                }
                out.write(rowdel);
                bytes += rowdel.length;
                rows++;
            }
            exec.setRowCount(rows);
        } finally {
            if (out != null) {
                out.close();
            }
        }
        long elapsed = timer.stop();
        if (info) {
            info(ctx, "COPY completed: " + rows + " row(s), " + Utils.formatSizeInBytes(bytes) + ", "
                    + timer.format() + ", " + rows / elapsed / 1000 + " rows/sec");
        }
    }

    /**
     * Run COPY FROM LOCAL statement
     */
    public Integer runFromLocal(HplsqlParser.Copy_from_local_stmtContext ctx) {
        trace(ctx, "COPY FROM LOCAL");
        initFileOptions(ctx.copy_file_option());
        HashMap<String, Pair<String, Long>> srcFiles = new HashMap<String, Pair<String, Long>>();
        String src = evalPop(ctx.copy_source(0)).toString();
        String dest = evalPop(ctx.copy_target()).toString();
        int srcItems = ctx.copy_source().size();
        for (int i = 0; i < srcItems; i++) {
            createLocalFileList(srcFiles, evalPop(ctx.copy_source(i)).toString(), null);
        }
        if (info) {
            info(ctx, "Files to copy: " + srcFiles.size() + " (" + Utils.formatSizeInBytes(srcSizeInBytes) + ")");
        }
        if (srcFiles.size() == 0) {
            exec.setHostCode(2);
            return 2;
        }
        timer.start();
        File file = new File();
        FileSystem fs = null;
        int succeed = 0;
        int failed = 0;
        long copiedSize = 0;
        try {
            fs = file.createFs();
            boolean multi = false;
            if (srcFiles.size() > 1) {
                multi = true;
            }
            for (Map.Entry<String, Pair<String, Long>> i : srcFiles.entrySet()) {
                try {
                    Path s = new Path(i.getKey());
                    Path d = null;
                    if (multi) {
                        String relativePath = i.getValue().getLeft();
                        if (relativePath == null) {
                            d = new Path(dest, s.getName());
                        } else {
                            d = new Path(dest, relativePath + Path.SEPARATOR + s.getName());
                        }
                    } else {
                        // Path to file is specified (can be relative), so treat target as a file name (hadoop fs -put behavior)
                        if (srcItems == 1 && i.getKey().endsWith(src)) {
                            d = new Path(dest);
                        }
                        // Source directory is specified, so treat the target as a directory 
                        else {
                            d = new Path(dest + Path.SEPARATOR + s.getName());
                        }
                    }
                    fs.copyFromLocalFile(delete, overwrite, s, d);
                    succeed++;
                    long size = i.getValue().getRight();
                    copiedSize += size;
                    if (info) {
                        info(ctx, "Copied: " + file.resolvePath(d) + " (" + Utils.formatSizeInBytes(size) + ")");
                    }
                } catch (IOException e) {
                    failed++;
                    if (!ignore) {
                        throw e;
                    }
                }
            }
        } catch (IOException e) {
            exec.signal(e);
            exec.setHostCode(1);
            return 1;
        } finally {
            long elapsed = timer.stop();
            if (info) {
                info(ctx,
                        "COPY completed: " + succeed + " succeed, " + failed + " failed, " + timer.format() + ", "
                                + Utils.formatSizeInBytes(copiedSize) + ", "
                                + Utils.formatBytesPerSec(copiedSize, elapsed));
            }
            if (failed == 0) {
                exec.setHostCode(0);
            } else {
                exec.setHostCode(1);
            }
            file.close();
        }
        return 0;
    }

    /**
     * Create the list of local files for the specified path (including subdirectories)
     */
    void createLocalFileList(HashMap<String, Pair<String, Long>> list, String path, String relativePath) {
        java.io.File file = new java.io.File(path);
        if (file.exists()) {
            if (file.isDirectory()) {
                for (java.io.File i : file.listFiles()) {
                    if (i.isDirectory()) {
                        String rel = null;
                        if (relativePath == null) {
                            rel = i.getName();
                        } else {
                            rel = relativePath + java.io.File.separator + i.getName();
                        }
                        createLocalFileList(list, i.getAbsolutePath(), rel);
                    } else {
                        long size = i.length();
                        list.put(i.getAbsolutePath(), Pair.of(relativePath, size));
                        srcSizeInBytes += size;
                    }
                }
            } else {
                long size = file.length();
                list.put(file.getAbsolutePath(), Pair.of(relativePath, size));
                srcSizeInBytes += size;
            }
        }
    }

    /**
     * Initialize COPY command options
     */
    void initOptions(HplsqlParser.Copy_stmtContext ctx) {
        int cnt = ctx.copy_option().size();
        for (int i = 0; i < cnt; i++) {
            HplsqlParser.Copy_optionContext option = ctx.copy_option(i);
            if (option.T_DELIMITER() != null) {
                delimiter = StringEscapeUtils.unescapeJava(evalPop(option.expr()).toString());
            } else if (option.T_SQLINSERT() != null) {
                sqlInsert = true;
                delimiter = ", ";
                if (option.ident() != null) {
                    sqlInsertName = option.ident().getText();
                }
            } else if (option.T_AT() != null) {
                targetConn = option.ident().getText();
                if (ctx.copy_target().expr() != null) {
                    sqlInsertName = evalPop(ctx.copy_target().expr()).toString();
                } else {
                    sqlInsertName = ctx.copy_target().getText();
                }
            } else if (option.T_BATCHSIZE() != null) {
                batchSize = evalPop(option.expr()).intValue();
            }
        }
    }

    /**
     * Initialize COPY FILE options
     */
    void initFileOptions(List<HplsqlParser.Copy_file_optionContext> options) {
        srcSizeInBytes = 0;
        for (HplsqlParser.Copy_file_optionContext i : options) {
            if (i.T_OVERWRITE() != null) {
                overwrite = true;
            } else if (i.T_DELETE() != null) {
                delete = true;
            } else if (i.T_IGNORE() != null) {
                ignore = true;
            }
        }
    }

    /**
     * Evaluate the expression and pop value from the stack
     */
    Var evalPop(ParserRuleContext ctx) {
        exec.visit(ctx);
        if (!exec.stack.isEmpty()) {
            return exec.stackPop();
        }
        return Var.Empty;
    }

    /**
     * Trace and information
     */
    public void trace(ParserRuleContext ctx, String message) {
        exec.trace(ctx, message);
    }

    public void info(ParserRuleContext ctx, String message) {
        exec.info(ctx, message);
    }
}