org.mskcc.cbio.portal.dao.MySQLbulkLoader.java Source code

Java tutorial

Introduction

Here is the source code for org.mskcc.cbio.portal.dao.MySQLbulkLoader.java

Source

/*
 * Copyright (c) 2015 Memorial Sloan-Kettering Cancer Center.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
 * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
 * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
 * obligations to provide maintenance, support, updates, enhancements or
 * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
 * liable to any party for direct, indirect, special, incidental or
 * consequential damages, including lost profits, arising out of the use of this
 * software and its documentation, even if Memorial Sloan-Kettering Cancer
 * Center has been advised of the possibility of such damage.
 */

/*
 * This file is part of cBioPortal.
 *
 * cBioPortal is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package org.mskcc.cbio.portal.dao;

import org.mskcc.cbio.portal.util.*;
import org.apache.commons.io.FileUtils;

import java.io.*;
import java.sql.*;
import java.util.*;

/**
 * To speed up CGDS data loading, bulk load from files using MySQL "LOAD DATA INFILE" functionality.
 * Intercept each record write in the normal load, buffer it in a temp file, and load the temp file when done.
 * NOT thread-safe.
 * @author arthur goldberg
 * In the future, would be cooler to implement this by overloading the JDBC Connection.prepareStatement
 * and PreparedStatement.setX() calls.
 */
public class MySQLbulkLoader {
    private static boolean bulkLoad = false;

    private static final Map<String, MySQLbulkLoader> mySQLbulkLoaders = new HashMap<String, MySQLbulkLoader>();

    /**
     * Get a MySQLbulkLoader
     * @param dbName database name
     * @return 
     */
    public static MySQLbulkLoader getMySQLbulkLoader(String dbName) {
        MySQLbulkLoader mySQLbulkLoader = mySQLbulkLoaders.get(dbName);
        if (mySQLbulkLoader == null) {
            mySQLbulkLoader = new MySQLbulkLoader(dbName);
            mySQLbulkLoaders.put(dbName, mySQLbulkLoader);
        }
        return mySQLbulkLoader;
    }

    /**
     * Flushes all pending data from the bulk writer. Temporarily disables referential
     * integrity while it does so, largely because MySQL uses a weird model and expects
     * referential integrity at each record, not just at the end of the transaction.
     * @return the number of rows added
     * @throws DaoException
     */
    public static int flushAll() throws DaoException {
        int checks = 0;
        PreparedStatement stmt = null;
        boolean executedSetFKChecks = false;
        try {
            Connection con = JdbcUtil.getDbConnection(MySQLbulkLoader.class);
            stmt = con.prepareStatement("SELECT @@foreign_key_checks;");
            ResultSet result = stmt.executeQuery();

            result.first();
            checks = result.getInt(1);

            stmt = con.prepareStatement("SET foreign_key_checks = ?;");
            stmt.setLong(1, 0);
            stmt.execute();
            executedSetFKChecks = true;

            int n = 0;
            for (MySQLbulkLoader mySQLbulkLoader : mySQLbulkLoaders.values()) {
                n += mySQLbulkLoader.loadDataFromTempFileIntoDBMS();
            }

            return n;
        } catch (IOException e) {
            System.err.println("Could not open temp file");
            e.printStackTrace();
            return -1;
        } catch (SQLException e) {
            throw new DaoException(e);
        } finally {
            mySQLbulkLoaders.clear();
            if (executedSetFKChecks && stmt != null) {
                try {
                    stmt.setLong(1, checks);
                    stmt.execute();
                } catch (SQLException e) {
                    throw new DaoException(e);
                }

            }
        }
    }

    private String tempFileName = null;
    private File tempFileHandle = null;
    private BufferedWriter tempFileWriter = null;
    private String tableName;
    private final String tempTableSuffix = ".tempTable";
    private int rows;
    // TODO: make configurable
    private static final long numDebuggingRowsToPrint = 0;

    private MySQLbulkLoader(String tableName) {
        try {
            openTempFile(tableName);
            this.tableName = tableName;
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Open temp file for table 'tableName'.
     * note that auto_increment fields must be handled specially.
     * @param tableName
     * @throws FileNotFoundException
     * @throws IOException
     * @throws IllegalArgumentException
     */
    private void openTempFile(String tableName) throws IOException {

        tempFileHandle = File.createTempFile(tableName, tempTableSuffix, FileUtils.getTempDirectory());

        tempFileName = tempFileHandle.getAbsolutePath();

        if (!tempFileHandle.exists()) {
            throw new FileNotFoundException("File does not exist: " + tempFileHandle);
        }
        if (!tempFileHandle.isFile()) {
            throw new IllegalArgumentException("Should not be a directory: " + tempFileHandle);
        }
        if (!tempFileHandle.canWrite()) {
            throw new IllegalArgumentException("File cannot be written: " + tempFileHandle);
        }

        // to improve performance use buffering; FileWriter always assumes default encoding is OK!
        this.tempFileWriter = new BufferedWriter(new FileWriter(tempFileHandle, false));
    }

    /**
     * write a record's fields, in order, to the table's temp file. if no fields are provided, writes no record.
     * fields are TAB separated. 
     * @param fieldValues
     */
    public void insertRecord(String... fieldValues) {
        if (fieldValues.length == 0) {
            return;
        }
        try {
            tempFileWriter.write(escapeValue(fieldValues[0]));
            for (int i = 1; i < fieldValues.length; i++) {
                tempFileWriter.write("\t");
                tempFileWriter.write(escapeValue(fieldValues[i]));
            }
            tempFileWriter.newLine();

            if (rows++ < numDebuggingRowsToPrint) {
                StringBuffer sb = new StringBuffer(escapeValue(fieldValues[0]));
                for (int i = 1; i < fieldValues.length; i++) {
                    sb.append("\t").append(escapeValue(fieldValues[i]));
                }
                System.err.println("MySQLbulkLoader: Wrote " + sb.toString() + " to '" + tempFileName + "'.");
            }
        } catch (IOException e) {
            System.err.println("Unable to write to temp file.\n");
            e.printStackTrace();
        }
    }

    private String escapeValue(String value) {
        if (value == null) {
            return "\\N";
        }

        return value.replace("\r", "").replaceAll("\n", "\\\\n").replace("\t", "\\t");
    }

    /**
     * load the temp file maintained by the MySQLbulkLoader into the DMBS.
     * truncates the temp file, and leaves it open for more insertRecord() operations.
     * returns number of records inserted.
     * 
     * TODO: perhaps instead of having each program that uses a DAO that uses bulk loading call 'completeInsert', 
     * get MySQLbulkLoader created by a factory, and have the factory remember to load all the tables from all
     * the temp files before the program exits. 
     * @return number of records inserted
     * @throws DaoException
     * @throws IOException 
     */
    private int loadDataFromTempFileIntoDBMS() throws DaoException, IOException {
        Connection con = null;
        Statement stmt;

        PreparedStatement pstmt = null;
        ResultSet rs = null;
        try {
            try {
                // close the file, flushing all buffers before loading the DBMS
                tempFileWriter.flush();
                tempFileWriter.close();
            } catch (IOException e) {
                throw new DaoException(e);
            }

            con = JdbcUtil.getDbConnection(MySQLbulkLoader.class);
            stmt = con.createStatement();

            String command = "LOAD DATA LOCAL INFILE '" + tempFileName + "'" + " INTO TABLE " + tableName;
            stmt.execute(command);

            int updateCount = stmt.getUpdateCount();
            ProgressMonitor
                    .setCurrentMessage(" --> records inserted into `" + tableName + "` table: " + updateCount);
            int nLines = FileUtil.getNumLines(tempFileHandle);
            if (nLines != updateCount) {
                String otherDetails = "";
                if (stmt.getWarnings() != null) {
                    otherDetails = "More error/warning details: " + stmt.getWarnings().getMessage();
                }
                throw new DaoException("DB Error: only " + updateCount + " of the " + nLines
                        + " records were inserted in `" + tableName + "`. " + otherDetails);

            } else {
                tempFileHandle.delete();
            }

            // reopen empty temp file -- not necessary, this loader will be removed.
            //this.tempFileWriter = new BufferedWriter(new FileWriter( this.tempFileHandle, false));

            return updateCount;

        } catch (SQLException e) {
            throw new DaoException(e);
        } finally {
            JdbcUtil.closeAll(MySQLbulkLoader.class, con, pstmt, rs);
        }
    }

    public String getTempFileName() {
        return tempFileName;
    }

    public String getTableName() {
        return tableName;
    }

    public static boolean isBulkLoad() {
        return bulkLoad;
    }

    public static void bulkLoadOn() {
        MySQLbulkLoader.bulkLoad = true;
    }

    public static void bulkLoadOff() {
        MySQLbulkLoader.bulkLoad = false;
    }

}