com.cloudera.hive.scd.SQLUpdater.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.hive.scd.SQLUpdater.java

Source

/*
 * Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */
package com.cloudera.hive.scd;

import com.google.common.collect.Lists;
import com.google.common.io.CharStreams;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.joda.time.format.ISODateTimeFormat;

import java.io.IOException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Locale;

/**
 * Base class for performing DML updates on the content read in from a {@code }RecordReader} via the
 * {@link com.cloudera.hive.scd.SCDRecordReader}. The updates should be contained in a file named
 * ".updates" that is located in the same directory as the regular input files.
 */
public abstract class SQLUpdater<K, V> {

    private Connection conn;
    private DMLHelper<K, V> helper;
    private List<PreparedStatement> updateStmts;
    private String tableName;

    public SQLUpdater() {
    }

    public void initialize(InputSplit split, JobConf jc) throws IOException {
        if (conn != null) {
            return;
        }
        List<String> updateStmts = loadUpdateStatements(split, jc);
        for (String sql : updateStmts) {
            String[] pieces = sql.toUpperCase(Locale.ENGLISH).split("\\s+");
            String table = null;
            if ("UPDATE".equals(pieces[0])) {
                table = pieces[1];
            } else if ("DELETE".equals(pieces[0])) {
                table = pieces[2];
            } else {
                throw new IllegalStateException("Unsupported DML: " + sql);
            }
            if (this.tableName == null) {
                this.tableName = table;
            } else if (!tableName.equals(table)) {
                throw new IllegalStateException("Multiple table names in DDL: " + tableName + " and " + table);
            }
        }

        if (tableName != null) {
            this.helper = createDMLHelper(tableName, split, jc);
            this.updateStmts = Lists.newArrayList();

            try {
                this.conn = DriverManager.getConnection("jdbc:h2:mem:");
            } catch (SQLException e) {
                throw new RuntimeException("H2 database not found", e);
            }

            try {
                this.helper.initialize(conn);
                for (String sql : updateStmts) {
                    this.updateStmts.add(conn.prepareStatement(sql));
                }
            } catch (SQLException e) {
                throw new IOException("Could not execute DDL", e);
            }
        }
    }

    protected abstract DMLHelper<K, V> createDMLHelper(String tableName, InputSplit split, JobConf jc);

    private long asSCDTime(String text, long defaultValue) {
        if (text == null || text.isEmpty()) {
            return defaultValue;
        } else {
            try {
                return Long.valueOf(text);
            } catch (NumberFormatException e) {
                return ISODateTimeFormat.dateOptionalTimeParser().parseMillis(text);
            }
        }
    }

    private List<String> loadUpdateStatements(InputSplit split, JobConf jc) throws IOException {
        long currentSCDTime = asSCDTime(jc.get("scd.time", ""), System.currentTimeMillis());
        List<String> stmts = Lists.newArrayList();
        if (split instanceof FileSplit) {
            Path base = ((FileSplit) split).getPath();
            FileSystem fs = base.getFileSystem(jc);
            Path updates = new Path(base.getParent(), ".updates");
            if (fs.exists(updates)) {
                stmts.addAll(readLines(fs, updates, currentSCDTime));
            }
        }
        return stmts;
    }

    private static final String TIME_PREFIX = "-- time=";

    private List<String> readLines(FileSystem fs, Path path, long rootScdTime) throws IOException {
        List<String> lines = Lists.newArrayList();
        long currentScdTime = 0L;
        StringBuilder workingLine = null;
        for (String line : CharStreams.readLines(new InputStreamReader(fs.open(path)))) {
            if (line.toLowerCase(Locale.ENGLISH).startsWith(TIME_PREFIX)) {
                currentScdTime = asSCDTime(line.substring(TIME_PREFIX.length()), rootScdTime);
            } else if (currentScdTime <= rootScdTime) {
                // Prune out comments/whitspace
                line = line.trim();
                int commentIndex = line.indexOf("--");
                if (commentIndex >= 0) {
                    line = line.substring(0, commentIndex);
                }
                if (!line.isEmpty()) {
                    if (!line.endsWith(";")) {
                        if (workingLine == null) {
                            workingLine = new StringBuilder();
                        }
                        workingLine.append(line).append(' ');
                    } else {
                        if (workingLine != null) {
                            workingLine.append(line);
                            lines.add(workingLine.toString());
                            workingLine = null; // working line is completed.
                        } else { // single-line statement
                            lines.add(line);
                        }
                    }
                }
            }
        }
        if (workingLine != null) {
            throw new IllegalStateException("Incomplete SQL in updates: " + workingLine.toString());
        }
        return lines;
    }

    public boolean apply(K currentKey, V currentValue) {
        if (tableName == null) {
            return false;
        }
        try {
            helper.insertValues(currentKey, currentValue);
            for (PreparedStatement ps : updateStmts) {
                ps.execute();
            }
            return helper.retrieveResults(currentKey, currentValue);
        } catch (SQLException e) {
            e.printStackTrace();
            return true;
        }
    }

    public void close() {
        try {
            if (helper != null) {
                helper.close();
            }
            if (updateStmts != null) {
                for (PreparedStatement ps : updateStmts) {
                    ps.close();
                }
            }
            if (conn != null) {
                conn.close();
            }
        } catch (SQLException e) {
            // NBD, but log these
        }
        conn = null;
    }
}