edu.ku.brc.specify.conversion.MSULichensFixer.java Source code

Java tutorial

Introduction

Here is the source code for edu.ku.brc.specify.conversion.MSULichensFixer.java

Source

/* Copyright (C) 2015, University of Kansas Center for Research
 * 
 * Specify Software Project, specify@ku.edu, Biodiversity Institute,
 * 1345 Jayhawk Boulevard, Lawrence, Kansas, 66045, USA
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/
package edu.ku.brc.specify.conversion;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Vector;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import edu.ku.brc.dbsupport.DataProviderFactory;
import edu.ku.brc.dbsupport.DataProviderSessionIFace;
import edu.ku.brc.specify.datamodel.Taxon;
import edu.ku.brc.specify.datamodel.TaxonTreeDef;
import edu.ku.brc.specify.datamodel.TaxonTreeDefItem;
import edu.ku.brc.specify.treeutils.NodeNumberer;
import edu.ku.brc.ui.ProgressFrame;
import edu.ku.brc.ui.UIRegistry;

/**
 * @author rods
 *
 * @code_status Alpha
 *
 * Aug 15, 2009
 *
 */
public class MSULichensFixer {
    protected static final Logger log = Logger.getLogger(GulfInvertsFixer.class);

    private static final String connStr = "jdbc:mysql://localhost/%s?characterEncoding=UTF-8&autoReconnect=true";

    protected static SimpleDateFormat dateTimeFormatter = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    protected static SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd");
    protected static Timestamp now = new Timestamp(System.currentTimeMillis());
    protected static String nowStr = dateTimeFormatter.format(now);

    protected Connection oldDBConn;
    protected Connection newDBConn;
    protected String oldDBName;
    protected String newDBName;
    protected TableWriter tblWriter;

    protected static int taxonomicUnitTypeId = 11111111;

    protected IdMapperIndexIncrementerIFace indexIncremeter;
    protected GenericDBConversion conversion;
    protected ProgressFrame frame;

    protected String taxonomyTypeIdInClause = null;
    protected String taxonFromClause = null;

    protected Vector<CollectionInfo> collectionInfoList;
    protected HashMap<Integer, Vector<CollectionInfo>> collDispHash;

    protected HashMap<Integer, TaxonTreeDef> newTaxonInfoHash = new HashMap<Integer, TaxonTreeDef>();

    protected HashSet<Integer> taxonTypesInUse = new HashSet<Integer>();
    protected HashMap<Integer, TaxonTreeDef> taxonTreeDefHash = new HashMap<Integer, TaxonTreeDef>(); // Key is old TaxonTreeTypeID
    protected HashMap<Integer, Taxon> taxonTreeHash = new HashMap<Integer, Taxon>(); // Key is old TaxonTreeTypeID

    ///////////////////////////////////////////////////////////////////
    // for TaxonName Row Processing
    ///////////////////////////////////////////////////////////////////
    protected IdMapperIFace txMapper = null;
    protected IdMapperIFace txTypMapper = null;
    protected IdMapperIFace txUnitTypMapper = null;
    protected IdMapperIFace[] mappers = null;

    protected String[] oldCols = { "TaxonNameID", "ParentTaxonNameID", "TaxonomyTypeID", "AcceptedID",
            "TaxonomicUnitTypeID", "TaxonomicSerialNumber", "TaxonName", "UnitInd1", "UnitName1", "UnitInd2",
            "UnitName2", "UnitInd3", "UnitName3", "UnitInd4", "UnitName4", "FullTaxonName", "CommonName", "Author",
            "Source", "GroupPermittedToView", "EnvironmentalProtectionStatus", "Remarks", "NodeNumber",
            "HighestChildNodeNumber", "LastEditedBy", "Accepted", "RankID", "GroupNumber", "TimestampCreated",
            "TimestampModified" };

    protected String[] cols = { "TaxonID", "Author", "CitesStatus", "COLStatus", "CommonName", "CultivarName",
            "EnvironmentalProtectionStatus", "EsaStatus", "FullName", "GroupNumber", "GUID",
            "HighestChildNodeNumber", "IsAccepted", "IsHybrid", "IsisNumber", "LabelFormat", "Name",
            "NcbiTaxonNumber", "NodeNumber", "Number1", "Number2", "RankID", "Remarks", "Source",
            "TaxonomicSerialNumber", "Text1", "Text2", "UnitInd1", "UnitInd2", "UnitInd3", "UnitInd4", "UnitName1",
            "UnitName2", "UnitName3", "UnitName4", "UsfwsCode", "Visibility", "ParentID", "AcceptedID",
            "ModifiedByAgentID", "TaxonTreeDefItemID", "VisibilitySetByID", "CreatedByAgentID", "HybridParent1ID",
            "TaxonTreeDefID", "HybridParent2ID", "TimestampCreated", "TimestampModified", "Version" };

    protected int[] colTypes = null;
    protected int[] colSizes = null;

    protected Hashtable<String, String> newToOldColMap = new Hashtable<String, String>();
    protected Hashtable<String, String> oldToNewColMap = new Hashtable<String, String>();
    protected HashMap<String, Integer> fieldToColHash = new HashMap<String, Integer>();
    protected HashMap<Integer, String> colToFieldHash = new HashMap<Integer, String>();
    protected HashMap<String, Integer> oldFieldToColHash = new HashMap<String, Integer>();

    protected PreparedStatement pStmtTx = null;
    protected Statement stmtTx = null;

    protected int missingParentTaxonCount = 0;
    protected int lastEditedByInx;
    protected int modifiedByAgentInx;
    protected int rankIdOldDBInx;

    public MSULichensFixer(final String oldDBName, final String newDBName, final TableWriter tblWriter) {
        super();
        this.oldDBName = oldDBName;
        this.newDBName = newDBName;
        this.tblWriter = tblWriter;
    }

    /**
     * @param oldDBName
     * @param newDBName
     */
    public void doConnect() {

        try {
            oldDBConn = DriverManager.getConnection(String.format(connStr, oldDBName), "root", "root");
            newDBConn = DriverManager.getConnection(String.format(connStr, newDBName), "root", "root");

        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    public void shutdown() {
        try {
            oldDBConn.close();
            newDBConn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    private void convertTaxonRecords() {
        IdMapperMgr.getInstance().setDBs(oldDBConn, newDBConn);

        txMapper = IdMapperMgr.getInstance().addTableMapper("taxonname", "TaxonNameID", false);
        txTypMapper = IdMapperMgr.getInstance().addTableMapper("TaxonomyType", "TaxonomyTypeID", false);
        txUnitTypMapper = IdMapperMgr.getInstance().addTableMapper("TaxonomicUnitType", "TaxonomicUnitTypeID",
                false);
        mappers = new IdMapperIFace[] { txMapper, txMapper, txTypMapper, txMapper, txUnitTypMapper };

        newToOldColMap.put("TaxonID", "TaxonNameID");
        newToOldColMap.put("ParentID", "ParentTaxonNameID");
        newToOldColMap.put("TaxonTreeDefID", "TaxonomyTypeID");
        newToOldColMap.put("TaxonTreeDefItemID", "TaxonomicUnitTypeID");
        newToOldColMap.put("Name", "TaxonName");
        newToOldColMap.put("FullName", "FullTaxonName");
        newToOldColMap.put("IsAccepted", "Accepted");

        oldToNewColMap.put("TaxonNameID", "TaxonID");
        oldToNewColMap.put("ParentTaxonNameID", "ParentID");
        oldToNewColMap.put("TaxonomyTypeID", "TaxonTreeDefID");
        oldToNewColMap.put("TaxonomicUnitTypeID", "TaxonTreeDefItemID");
        oldToNewColMap.put("TaxonName", "Name");
        oldToNewColMap.put("FullTaxonName", "FullName");
        oldToNewColMap.put("Accepted", "IsAccepted");

        BasicSQLUtils.setDBConnection(newDBConn);

        StringBuilder newSB = new StringBuilder();
        StringBuilder vl = new StringBuilder();
        for (int i = 0; i < cols.length; i++) {
            fieldToColHash.put(cols[i], i + 1);
            colToFieldHash.put(i + 1, cols[i]);

            if (newSB.length() > 0)
                newSB.append(", ");
            newSB.append(cols[i]);

            if (vl.length() > 0)
                vl.append(',');
            vl.append('?');
        }

        StringBuilder oldSB = new StringBuilder();
        for (int i = 0; i < oldCols.length; i++) {
            oldFieldToColHash.put(oldCols[i], i + 1);
            if (oldSB.length() > 0)
                oldSB.append(", ");
            oldSB.append("ttx.");
            oldSB.append(oldCols[i]);
        }

        rankIdOldDBInx = oldFieldToColHash.get("RankID");

        String sqlStr = String.format("SELECT %s FROM taxon ", newSB.toString());
        log.debug(sqlStr);

        String fromClause = " FROM taxonname ttx LEFT JOIN msu_lichens.taxonname_TaxonNameID ON OldID = ttx.TaxonNameID LEFT JOIN msu_lichens_6.taxon AS ntx ON NewID = ntx.TaxonID WHERE ntx.TaxonID IS NULL";
        String sql = String.format("SELECT %s %s", oldSB.toString(), fromClause);
        log.debug(sql);

        String cntSQL = String.format("SELECT COUNT(*) %s", fromClause);
        log.debug(cntSQL);

        int txCnt = BasicSQLUtils.getCountAsInt(oldDBConn, cntSQL);
        if (frame != null) {
            frame.setProcess(0, txCnt);
        }

        log.debug(txCnt);

        String pStr = String.format("INSERT INTO taxon (%s) VALUES (%s)", newSB.toString(), vl.toString());
        log.debug(pStr);

        try {
            stmtTx = newDBConn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY);
            ResultSet rs1 = stmtTx.executeQuery(sqlStr);
            ResultSetMetaData rsmd1 = rs1.getMetaData();
            colTypes = new int[rsmd1.getColumnCount()];
            colSizes = new int[rsmd1.getColumnCount()];
            for (int i = 0; i < colTypes.length; i++) {
                colTypes[i] = rsmd1.getColumnType(i + 1);
                colSizes[i] = rsmd1.getPrecision(i + 1);
            }
            rs1.close();
            stmtTx.close();

            missingParentTaxonCount = 0;
            lastEditedByInx = oldFieldToColHash.get("LastEditedBy");
            modifiedByAgentInx = fieldToColHash.get("ModifiedByAgentID");
            stmtTx = oldDBConn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY);
            pStmtTx = newDBConn.prepareStatement(pStr);

            int cnt = 0;
            ResultSet rs = stmtTx.executeQuery(sql);
            ResultSetMetaData rsmd = rs.getMetaData();
            while (rs.next()) {
                processRow(rs, rsmd, null);

                cnt++;
                if (cnt % 1000 == 0) {
                    log.debug(cnt);
                    if (frame != null) {
                        frame.setProcess(cnt);
                    }
                }
            }
            rs.close();

            if (frame != null) {
                frame.setProcess(txCnt, txCnt);
            }

            String msg = String.format("Stranded Taxon (no parent): %d", missingParentTaxonCount);
            tblWriter.log(msg);
            log.debug(msg);

            if (missingParentTaxonCount > 0) {
                if (frame != null)
                    frame.setDesc("Renumbering the tree nodes, this may take a while...");

                HashSet<Integer> ttdHash = new HashSet<Integer>();
                for (CollectionInfo colInfo : CollectionInfo.getFilteredCollectionInfoList()) {
                    if (!ttdHash.contains(colInfo.getTaxonTreeDef().getId())) {
                        DataProviderSessionIFace session = null;
                        try {
                            session = DataProviderFactory.getInstance().createSession();

                            TaxonTreeDef taxonTreeDef = colInfo.getTaxonTreeDef();
                            taxonTreeDef = (TaxonTreeDef) session
                                    .getData("FROM TaxonTreeDef WHERE id = " + taxonTreeDef.getId());

                            sql = "SELECT TaxonID FROM taxon WHERE RankID = 0 AND TaxonTreeDefID = "
                                    + taxonTreeDef.getId();
                            log.debug(sql);
                            Integer txRootId = BasicSQLUtils.getCount(sql);
                            Taxon txRoot = (Taxon) session.getData("FROM Taxon WHERE id = " + txRootId);

                            NodeNumberer<Taxon, TaxonTreeDef, TaxonTreeDefItem> nodeNumberer = new NodeNumberer<Taxon, TaxonTreeDef, TaxonTreeDefItem>(
                                    txRoot.getDefinition());
                            nodeNumberer.doInBackground();

                        } catch (Exception ex) {
                            //session.rollback();
                            ex.printStackTrace();

                        } finally {
                            if (session != null) {
                                session.close();
                            }
                        }
                        ttdHash.add(colInfo.getTaxonTreeDef().getId());
                    }
                }
                if (frame != null)
                    frame.setDesc("Renumbering done.");
            }
            missingParentTaxonCount = 0;

        } catch (SQLException ex) {
            ex.printStackTrace();

        } finally {
            try {
                stmtTx.close();
                pStmtTx.close();
            } catch (Exception ex) {
            }
        }

        System.out.println("Done.");
    }

    /**
     * @param rs
     * @param rsmd
     * @param parentNodeId
     * @return
     * @throws SQLException
     */
    protected boolean processRow(final ResultSet rs, final ResultSetMetaData rsmd, final Integer parentNodeId)
            throws SQLException {
        for (int colInx = 1; colInx <= cols.length; colInx++) {
            pStmtTx.setNull(colInx, colTypes[colInx - 1]);
        }

        String newName = null;
        Boolean isRoot = null;
        boolean skip = false;
        for (int colInx = 1; colInx <= oldCols.length && !skip; colInx++) {
            String oldName = oldCols[colInx - 1];
            Integer newInx = fieldToColHash.get(oldName);
            if (newInx == null) {
                newName = oldToNewColMap.get(oldName);
                if (newName != null) {
                    newInx = fieldToColHash.get(newName);
                    if (newInx == -1) {
                        String msg = "Couldn't find column index for New Name[" + newName + "]";
                        log.error(msg);
                        tblWriter.logError(msg);
                    }
                } else if (colInx == lastEditedByInx) {
                    String lastEditedByStr = rs.getString(colInx);
                    if (StringUtils.isNotEmpty(lastEditedByStr)) {
                        Integer agtId = 1;//conversion.getModifiedByAgentId(lastEditedByStr);
                        if (agtId != null) {
                            pStmtTx.setInt(modifiedByAgentInx, agtId);
                            continue;
                        }
                    }

                    pStmtTx.setInt(colInx, 1);
                    continue;

                } else if (colInx != 20) {
                    String msg = "Couldn't find Old Name[" + oldName + "]";
                    log.error(msg);
                    tblWriter.logError(msg);
                } else {
                    continue; // GroupToView
                }
            }

            if (colInx < 6) {
                if (isRoot == null) {
                    isRoot = rs.getInt(rankIdOldDBInx) == 0;
                }
                Integer oldID = rs.getInt(colInx);
                if (!rs.wasNull() || (isRoot && colInx == 2)) {
                    boolean skipError = false;

                    Integer newID = null;
                    if (oldID == 612195491)
                        oldID = 21;
                    else if (oldID == -447245554)
                        oldID = -1414322196;

                    if (oldName.equals("TaxonomyTypeID")) {
                        newID = 1;

                    } else if (oldName.equals("TaxonomicUnitTypeID")) {
                        String s = "SELECT RankID FROM taxonomicunittype WHERE TaxonomicUnitTypeID = " + oldID;
                        Integer rankId = BasicSQLUtils.getCount(oldDBConn, s);
                        log.debug(s);
                        switch (rankId) {
                        case 0:
                            newID = 8;
                            break;
                        case 10:
                            newID = 12;
                            break;
                        case 20:
                            newID = 22;
                            break;
                        case 30:
                            newID = 23;
                            break;
                        case 40:
                            newID = 7;
                            break;
                        case 60:
                            newID = 14;
                            break;
                        case 70:
                            newID = 4;
                            break;
                        case 100:
                            newID = 17;
                            break;
                        case 110:
                            newID = 19;
                            break;
                        case 140:
                            newID = 15;
                            break;
                        case 150:
                            newID = 10;
                            break;
                        case 160:
                            newID = 16;
                            break;
                        case 170:
                            newID = 2;
                            break;
                        case 180:
                            newID = 5;
                            break;
                        case 190:
                            newID = 13;
                            break;
                        case 200:
                            newID = 1;
                            break;
                        case 210:
                            newID = 6;
                            break;
                        case 220:
                            newID = 21;
                            break;
                        case 230:
                            newID = 18;
                            break;
                        case 240:
                            newID = 11;
                            break;
                        case 250:
                            newID = 3;
                            break;
                        case 260:
                            newID = 20;
                            break;
                        case 270:
                            newID = 9;
                            break;
                        default:
                            log.error("Error with rank: " + rankId);
                        }

                    } else {
                        newID = mappers[colInx - 1].get(oldID);
                        if (newID == null && (colInx == 5 || colInx == 3)) {
                            String s = "SELECT RankID FROM taxonomicunittype WHERE TaxonomicUnitTypeID = " + oldID;
                            Integer rankId = BasicSQLUtils.getCount(oldDBConn, s);
                            log.debug(s);
                            if (rankId != null) {
                                s = "SELECT TaxonomicUnitTypeID FROM taxonomicunittype WHERE TaxonomyTypeID = 6 && RankID = "
                                        + rankId;
                                oldID = BasicSQLUtils.getCount(oldDBConn, s);
                                log.debug(s);
                                if (oldID != null) {
                                    newID = mappers[colInx - 1].get(oldID);
                                    if (newID == null) {
                                        log.error("newID is null for Old Id " + oldID + "   colInx: " + colInx);
                                    }
                                } else {
                                    log.error("oldID is null");
                                }
                            } else {
                                log.error("rankId is null");
                            }
                        }

                    }

                    if (newID == null) {
                        if (colInx == 3 || colInx == 5) {
                            if (!isRoot) {
                                skip = true;
                            }

                        } else if (colInx == 2 && (parentNodeId != null || isRoot)) {
                            // Note for RankID == 0 the parent would be null because it is the root
                            newID = parentNodeId;

                        } else {
                            boolean wasInOldTaxonTable = BasicSQLUtils.getCountAsInt(oldDBConn,
                                    "SELECT COUNT(*) FROM taxonname WHERE TaxonNameID = " + oldID) > 0;
                            boolean isDetPointToTaxon = BasicSQLUtils.getCountAsInt(oldDBConn,
                                    "SELECT COUNT(*) FROM determination WHERE TaxonNameID = " + oldID) > 0;
                            if (isDetPointToTaxon) {
                                String msg = String.format(
                                        "***** Couldn't get %s NewID [%d] from mapper for colInx[%d] In Old taxonname table: %s  WasParentID: %s  Det Using: %s",
                                        (colInx == 2 ? "Parent" : ""), oldID, colInx,
                                        (wasInOldTaxonTable ? "YES" : "no"), (colInx == 2 ? "YES" : "no"),
                                        (isDetPointToTaxon ? "YES" : "no"));
                                log.error(msg);
                                tblWriter.logError(msg);
                            }
                            skipError = true;
                            missingParentTaxonCount++;
                        }
                    }

                    if (!skip) {
                        if (newID != null) {
                            //System.out.println("newInx["+newInx+"]  newID["+newID+"] oldID["+oldID+"]");
                            pStmtTx.setInt(newInx, newID);

                        } else if (!skipError && !isRoot) {
                            String msg = "Unable to map old TaxonNameID[" + oldID + "]";
                            log.error(msg);
                            tblWriter.logError(msg);
                        }
                    }
                } else {
                    //log.error("***** Old ID Col ["+colInx+"] was null");
                    //skip = true;
                }
                continue;
            }

            switch (colTypes[newInx - 1]) {
            case java.sql.Types.BIT: {
                boolean val = rs.getBoolean(colInx);
                if (!rs.wasNull())
                    pStmtTx.setBoolean(newInx, val);
                break;
            }
            case java.sql.Types.INTEGER: {
                int val = rs.getInt(colInx);
                if (!rs.wasNull())
                    pStmtTx.setInt(newInx, val);

                //System.out.println("newInx["+colInx+"]  newID["+val+"]");
                break;
            }
            case java.sql.Types.SMALLINT: {
                short val = rs.getShort(colInx);
                if (!rs.wasNull())
                    pStmtTx.setShort(newInx, val);
                break;
            }
            case java.sql.Types.TIMESTAMP: {
                Timestamp val = rs.getTimestamp(colInx);
                //if (val == null && oldName.equals("Date"))
                //{
                //    pStmtTx.setTimestamp(newInx, null);
                //} else
                //{
                pStmtTx.setTimestamp(newInx, !rs.wasNull() ? val : null);
                //}
                break;
            }
            case java.sql.Types.LONGVARCHAR:
            case java.sql.Types.VARCHAR: {
                int len = colSizes[newInx - 1];
                String val = rs.getString(colInx);
                if (val != null && val.length() > len) {
                    newName = oldToNewColMap.get(oldName);
                    String msg = String.format("Concatinating field [%s] from length %d to %d String Lost:[%s]",
                            newName, val.length(), len, val.substring(len));
                    log.debug(msg);
                    tblWriter.logError(msg);

                    val = val.substring(0, len);
                }
                if (!rs.wasNull()) {
                    pStmtTx.setString(newInx, val);

                } else if (colInx == 7) {
                    pStmtTx.setString(newInx, "Empty");
                }
                break;
            }
            default:
                log.error("Didn't support SQL Type: " + rsmd.getColumnType(colInx));
                break;
            }

        }

        if (!skip) {
            if (parentNodeId != null) {
                int nxtId = BasicSQLUtils.getCountAsInt("SELECT TaxonID FROM taxon ORDER BY TaxonID DESC LIMIT 0,1")
                        + 1;
                pStmtTx.setInt(1, nxtId);
            }

            pStmtTx.setInt(fieldToColHash.get("Version"), 0);
            try {
                //System.out.println("----------------------------------------");
                pStmtTx.execute();

            } catch (Exception ex) {
                ex.printStackTrace();
                UIRegistry.showError(ex.toString());
            }
        }

        return true;
    }

    public static void main(String[] args) {
        String path = "msulichens.html";
        TableWriter writer;
        try {
            writer = new TableWriter(path, "MSU Lichens");
            MSULichensFixer msuf = new MSULichensFixer("msu_lichens", "msu_lichens_6", writer);
            msuf.doConnect();
            msuf.convertTaxonRecords();
            msuf.shutdown();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
    }
}