edu.ku.brc.specify.tools.webportal.BuildSearchIndex2.java Source code

Introduction

Here is the source code for edu.ku.brc.specify.tools.webportal.BuildSearchIndex2.java
Source

/* This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
/**
 * 
 */
package edu.ku.brc.specify.tools.webportal;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import net.sf.json.JSONObject;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import edu.ku.brc.af.core.SchemaI18NService;
import edu.ku.brc.af.core.UsageTracker;
import edu.ku.brc.af.core.db.DBFieldInfo;
import edu.ku.brc.af.core.db.DBTableIdMgr;
import edu.ku.brc.af.core.db.DBTableInfo;
import edu.ku.brc.af.ui.forms.formatters.UIFieldFormatterIFace;
import edu.ku.brc.dbsupport.DBConnection;
import edu.ku.brc.specify.conversion.BasicSQLUtils;
import edu.ku.brc.specify.datamodel.CollectionObject;
import edu.ku.brc.specify.datamodel.SpExportSchemaMapping;
import edu.ku.brc.specify.tasks.subpane.qb.QBDataSourceListenerIFace;

/**
 * @author rod
 *
 * @code_status Alpha
 *
 * Created Date: Feb 2, 2012
 * 
 * Builds lucene indexes, and sets up configuration files for solr and the web portal application.
 * (Currently accessed via Export tool.)
 *
 */
public class BuildSearchIndex2 {

    private final String collectionName;
    private final String attachmentURL;
    private final String outputFileName;
    private final Path tempDir;
    private Connection dbConn = null;
    private Connection dbConn2 = null;
    private Connection dbConn3 = null;

    //-------------------------------
    // Lucene Indexing
    //-------------------------------

    protected int INDEX_DIR = 0;
    protected int INDEX_DIR_COL = 1;
    protected int INDEX_DIR_GEO = 2;
    protected int INDEX_DIR_LOC = 3;
    protected int INDEX_DIR_TXN = 4;

    //protected String[]      fileNames  = new String[] {"index-specify-kuherps"};
    protected String[] fileNames;
    protected File[] files;
    protected Analyzer[] analyzers;
    protected IndexReader[] readers;

    //protected Searcher     searcher;
    protected String dbName;

    protected final SpExportSchemaMapping mapping;
    protected final String writeToDir;

    protected String fieldsXml; //the "fields" block for the solr schema.xml file.

    protected final String[][] systemFlds = { { "spid", "string", "true", "true", "true" },
            { "cs", "string", "true", "false", "true" }, { "contents", "string", "false", "true", "true" },
            { "img", "string", "true", "true", "false" }, { "geoc", "string", "true", "true", "false" } };

    public BuildSearchIndex2(SpExportSchemaMapping mapping, String outputFileName, String collectionName,
            String attachmentURL) {
        super();
        //XXX need to get schemamapping object, probably
        this.mapping = mapping;
        this.outputFileName = outputFileName;
        this.collectionName = collectionName;
        this.attachmentURL = attachmentURL;
        try {
            tempDir = Files.createTempDirectory("portal_export");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        this.writeToDir = tempDir.toString() + File.separator + "PortalFiles";

        String solrIdxDir = this.writeToDir + File.separator + "solr";
        fileNames = new String[] { solrIdxDir };
        files = new File[fileNames.length];
        analyzers = new Analyzer[fileNames.length];
        readers = new IndexReader[fileNames.length];
    }

    /**
     * @param server
     * @param port
     * @param dbName
     * @param username
     * @param pwd
     */
    public void createDBConnection(final String server, final String port, final String dbName,
            final String username, final String pwd) throws SQLException {
        disConnect();

        this.dbName = dbName;

        String connStr = "jdbc:mysql://%s:%s/%s?characterEncoding=UTF-8&autoReconnect=true";
        try {
            dbConn = DriverManager.getConnection(String.format(connStr, server, port, dbName), username, pwd);
            dbConn2 = DriverManager.getConnection(String.format(connStr, server, port, dbName), username, pwd);
            dbConn3 = DriverManager.getConnection(String.format(connStr, server, port, dbName), username, pwd);

        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    public void connect() throws SQLException {
        disConnect();
        dbConn = DBConnection.getInstance().createConnection();
        dbConn2 = DBConnection.getInstance().createConnection();
        dbConn3 = DBConnection.getInstance().createConnection();
    }

    protected void disConnect() throws SQLException {
        disConnect(dbConn);
        disConnect(dbConn2);
        disConnect(dbConn3);
    }

    protected void disConnect(Connection con) throws SQLException {
        if (con != null) {
            if (!con.isClosed()) {
                con.close();
            }
            con = null;
        }
    }

    /**
     * 
     */
    public void cleanup() throws SQLException {
        disConnect();
    }

    /**
     * @return the dbConn
     */
    public Connection getDBConn() {
        return dbConn;
    }

    private String createQuery(String tblName) {
        /*String sql = "SELECT si.FieldName, qf.FieldName, qf.ColumnAlias, qf.TableList, qf.StringId, qf.FormatName, qf.ContextTableIdent, qf.`Position` FROM spexportschema AS s " +
                    "Inner Join spexportschemaitem AS si ON s.SpExportSchemaID = si.SpExportSchemaID " +
                    "Inner Join spexportschemaitemmapping AS smi ON si.SpExportSchemaItemID = smi.ExportSchemaItemID " +
                    "Inner Join spqueryfield AS qf ON smi.SpQueryFieldID = qf.SpQueryFieldID  WHERE " +
                    "ORDER BY  qf.ContextTableIdent ASC, qf.`Position` ASC";
            
        Statement stmt = null;
        try
        {
            
        } catch (Exception ex)
        {
            
        }*/
        //                                             
        String sqlStr = "SELECT * from " + tblName;
        return sqlStr;
    }

    private String getAttachments(Connection conn, String baseTblName, Integer baseKey,
            boolean includeRelatedAttachments) throws SQLException {
        if (!"collectionobject".equals(baseTblName)) {
            throw new NotImplementedException("unsupported base table");
        }
        if (includeRelatedAttachments) {
            throw new NotImplementedException("includeRelatedAttachments is not implemented");
        }
        return getAttachments(conn, baseTblName, baseKey);
    }

    private String getAttachments(Connection conn, String baseTblName, Integer baseKey) throws SQLException {
        String attacherTbl = baseTblName + "attachment";
        String baseTblID = baseTblName + "ID"; //XXX dude... use DBTableMgr stuff...
        String sql = "select att.AttachmentID, att.AttachmentLocation, att.Title, aia.Height, aia.Width from attachment att "
                + "left join attachmentimageattribute aia on aia.AttachmentImageAttributeID "
                + "= att.AttachmentImageAttributeID inner join " + attacherTbl + " oatt on oatt.AttachmentID "
                + "= att.AttachmentID where att.IsPublic and att.MimeType like 'image/%' and oatt." + baseTblID
                + " = " + baseKey;
        Statement stmt = null;
        ResultSet rs = null;
        String result = null;
        try {
            stmt = conn.createStatement();
            rs = stmt.executeQuery(sql);
            while (rs.next()) {
                if (result == null) {
                    result = "[";
                } else {
                    result += ", ";
                }
                String[] flds = { "AttachmentID,no", "AttachmentLocation,yes", "Title,yes", "Height,no",
                        "Width,no" };
                String rec = "";
                for (String fld : flds) {
                    String[] def = fld.split(",");
                    boolean quote = "yes".equals(def[1]);
                    String json = jsonFldVal(rs.getObject(def[0]), quote, def[0]);
                    if (json.length() > 0) {
                        if (rec.length() > 0) {
                            rec += ",";
                        }
                        rec += json;
                    }
                }
                if (rec.length() > 0) {
                    result += "{" + rec + "}";
                }
            }
            if (result != null && result.length() > 0) {
                result += "]";
            }
        } finally {
            if (stmt != null) {
                stmt.close();
            }
            if (rs != null) {
                rs.close();
            }
        }
        return result;
    }

    private String jsonFldVal(Object val, boolean quote, String name) {
        String result = "";
        if (val != null) {
            String q = quote ? "\"" : "";
            result = name + ":" + q + escape4Json(String.valueOf(val)) + q;
        }
        return result;
    }

    private String escape4Json(String str) {
        //escapes quotes,\, /, \r, \n, \b, \f, \t
        String[] badJson = { "\\", "\"", "\r", "\n", "\b", "\f", "\t" };
        String result = str;
        for (String bad : badJson) {
            result = result.replace(bad, "\\" + bad);
        }
        return result;
    }

    /**
     * @param columnIdx
     * @return
     */
    private boolean includeColumn(int columnIdx) {
        return true;
    }

    /**
     * @param inStr
     * @param len
     * @param used
     * @return an un-used abbreviation for str, hopefully of length <= len. 
     * 
     * If necessary an abbreviation of length (slightly) > len will be returned.
     * Result is not added to used. 
     * 
     */
    private String getAbbreviation(String inStr, int len, List<String> used) {
        String[] bad = { "", ",", "", "\\", "{", "}", "[", "]", ";", ":", "\"", "'", "!", "@", "#", "$", "%",
                "^", "&", "*", "(", ")", "+", "=", "|", "/", "?", "<", ">", "~", "`", "." };
        String str = inStr;
        for (String b : bad) {
            str = str.replace(b, "_");
        }
        str = str.replace(" ", "");
        String abbr = str.substring(0, 1).toLowerCase();
        int c = 1;
        int lastUsed = 1;
        while (c < str.length()) {
            String s = str.substring(c, c + 1);
            if (s.equals(s.toUpperCase())) {
                abbr += s.toLowerCase();
                //lastUsed = c;
            }
            c++;
        }
        if (abbr.length() < len) {
            c = lastUsed;
            while (abbr.length() < len) {
                if (c < str.length()) {
                    abbr += str.substring(c, c + 1).toLowerCase();
                } else {
                    abbr += "x";
                }
            }
        }
        String result = abbr.length() > len ? abbr.substring(0, len) : abbr;
        if (used.indexOf(result) != -1) {
            int l = result.length() + 1;
            while (l < abbr.length()) {
                result = abbr.substring(0, l);
                if (used.indexOf(result) == -1) {
                    l = abbr.length();
                } else {
                    l++;
                }
            }
            int numb = 1;
            l = result.length();
            while (used.indexOf(result) != -1) {
                result = result.substring(0, l) + numb++;
            }
        }
        return result;
    }

    private Map<Integer, String> getShortNamesForFields(ExportMappingHelper map) {
        Map<Integer, String> result = new HashMap<Integer, String>();
        ArrayList<String> used = new ArrayList<String>();
        for (String[] sys : systemFlds) {
            used.add(sys[0]);
        }
        for (ExportMappingInfo mapping : map.getMappings()) {
            //XXX need to watch out that the QueryField.position-columnIndex relationship maintained --- is not messed up by un-mapped conditions, etc
            String abbr = getAbbreviation(mapping.getSpFldName(), 2, used);
            used.add(abbr);
            result.put(mapping.getColIdx(), abbr);
        }
        return result;
    }

    private String getFldXmlForSolr(String name, String type, boolean indexed, boolean stored, boolean required) {
        String idxStr = indexed ? "true" : "false";
        String storedStr = stored ? "true" : "false";
        String reqStr = required ? "true" : "false";
        return "<field name=\"" + name + "\"" + " type=\"" + type + "\"" + " indexed=\"" + idxStr + "\""
                + " stored=\"" + storedStr + "\"" + " required=\"" + reqStr + "\"" + "/>";
    }

    private String getSolrFldType(ExportMappingInfo info) {
        DBFieldInfo fld = info.getFldInfo();
        if (fld != null) {
            if (info.isRecordTyper()) {
                return "string";
            }
            if (fld.getTableInfo().getTableId() == CollectionObject.getClassTableId()
                    && fld.getName().equalsIgnoreCase("catalognumber")) {
                //XXX ouch. What happens if there are multiple collections in a single portal, some with numeric catnums and some string???
                UIFieldFormatterIFace formatter = fld.getFormatter();
                if (formatter.isNumeric()) {
                    return "int";
                } else {
                    return "string";
                }
            }
            String type = fld.getType();
            if (StringUtils.isNotEmpty(type)) {
                if (type.equals("java.lang.String")) {
                    return "string";
                } else if (type.equals("java.util.Calendar")) {
                    String fldId = info.getFldId();
                    if (fldId.endsWith("NumericDay") || fldId.endsWith("NumericMonth")
                            || fldId.endsWith("NumericYear")) {
                        return "int";
                    } else {
                        return "string";//XXX need to ALWAYS format dates in YYYY-MM-DD (plus time?) format???
                    }
                } else if (type.equals("java.lang.Float")) {
                    return "tfloat";
                } else if (type.equals("text")) {
                    return "string";
                } else if (type.equals("java.sql.Timestamp")) {
                    return "string";
                } //XXX format???
                else if (type.equals("java.math.BigDecimal")) {
                    return "tdouble";
                } //XXX maybe?  
                else if (type.equals("java.lang.Integer")) {
                    return "tint";
                } else if (type.equals("java.lang.Boolean")) {
                    return /*"boolean"; there seems to be no method in lucene.Document to add boolean fields*/ "string";
                } else if (type.equals("java.lang.Byte")) {
                    return "tint";
                } else if (type.equals("java.lang.Double")) {
                    return "tdouble";
                } else if (type.equals("java.lang.Short")) {
                    return "tint";
                } else if (type.equals("java.util.Date")) {
                    return "string";
                } //XXX format???
                else if (type.equals("java.lang.Long")) {
                    return "tlong";
                }
            }
        }
        //else the fld is formatted or aggregated
        return "string";
    }

    /*private String getSolrFldTypeForSchema(DBFieldInfo  fld)
    {
       if (fld != null)
       {
        if (fld.getTableInfo().getTableId() == CollectionObject.getClassTableId() && fld.getName().equalsIgnoreCase("catalognumber"))
        {
           //XXX ouch. What happens if there are multiple collections in a single portal, some with numeric catnums and some string???
           UIFieldFormatterIFace formatter = fld.getFormatter();
           if (formatter.isNumeric())
           {
              return "
           }
        }
      String type = fld.getType();
        if (StringUtils.isNotEmpty(type))
        {
           if (type.equals("java.lang.String")) {return "string";}
           else if (type.equals("java.util.Calendar")) {return "string";}//XXX need to ALWAYS format dates in YYYY-MM-DD (plus time?) format???
           else if (type.equals("java.lang.Float")) {return "string";}
           else if (type.equals("text")) {return "string";}
           else if (type.equals("java.sql.Timestamp")) {return "string";}//XXX format???
           else if (type.equals("java.math.BigDecimal")) {return "string";} //XXX maybe?  
           else if (type.equals("java.lang.Integer")) {return "string";}
           else if (type.equals("java.lang.Boolean")) {return "string";}
           else if (type.equals("java.lang.Byte")) {return "string";}
           else if (type.equals("java.lang.Double")) {return "tdouble";}
           else if (type.equals("java.lang.Short")) {return "string";}
           else if (type.equals("java.util.Date")) {return "string";} //XXX format???
           else if (type.equals("java.lang.Long")) {return "string";} 
        }
    }
       //else the fld is formatted or aggregated
       return "string";
    }*/

    public List<String> getDbFieldInfoTypes() {
        List<String> result = new ArrayList<String>();
        for (DBTableInfo tbl : DBTableIdMgr.getInstance().getTables()) {
            for (DBFieldInfo fld : tbl.getFields()) {
                if (result.indexOf(fld.getType()) == -1) {
                    result.add(fld.getType());
                }
            }
        }
        for (String r : result) {
            System.out.println(r);
        }
        return result;

    }

    private List<String> getFldsXmlForSchema(ExportMappingHelper map, Map<Integer, String> shortNames) {
        List<String> result = new ArrayList<String>();
        for (ExportMappingInfo info : map.getMappings()) {
            String name = shortNames.get(info.getColIdx());
            //String type = getSolrFldTypeForSchema(info.getFldInfo());  why is getSolrFldTypeForSchema different than getSolrFldType?????
            String type = getSolrFldType(info);
            result.add(getFldXmlForSolr(name, type, true, true, false));
        }
        for (int f = 0; f < systemFlds.length; f++) {
            String[] fld = systemFlds[f];
            result.add(f, getFldXmlForSolr(fld[0], fld[1], Boolean.valueOf(fld[2]), Boolean.valueOf(fld[3]),
                    Boolean.valueOf(fld[4])));
        }
        //XXX do we need the xref thing or something like it???
        //result.add(3, getFldXmlForSolr("xref", "string", false, true, true));
        return result;
    }

    protected List<String> getModelInJson(ExportMappingHelper map, Map<Integer, String> shortNames)
            throws IllegalAccessException {
        List<String> result = new ArrayList<String>();
        result.add("[");
        result.add("{\"colname\":\"spid\", \"solrname\":\"spid\", \"solrtype\":\"int\"},");
        for (ExportMappingInfo m : map.getMappings()) {
            WebPortalFieldDef def = new WebPortalFieldDef(m, shortNames.get(m.getColIdx()), getSolrFldType(m));
            result.add(def.toJson() + (result.size() > 0 ? "," : ""));
        }
        result.add("{\"colname\":\"img\", \"solrname\":\"img\", \"solrtype\":\"string\", \"title\":\"image\"}");
        result.add("]");
        return result;
    }

    protected void writeSolrFldXmlToFile(List<String> solrFldXml) throws IOException {
        //        System.out.println("\nFlds:");
        //        for (String fld : solrFldXml)
        //        {
        //           System.out.println(fld);
        //        }
        //        System.out.println();

        List<String> myCopy = new ArrayList<String>(solrFldXml);
        myCopy.add(0, "<!-- solr field definitions for " + mapping.getMappingName() + " web portal -->");
        myCopy.add(1, "<!-- Paste the contents of this file into the solr/conf/schema.xml file. -->");
        File f = new File(writeToDir + File.separator + "SolrFldSchema.xml");
        FileUtils.writeLines(f, "utf8", myCopy);
    }

    protected void writePortalJsonToFile(List<String> portalJson) throws IOException {
        //        System.out.println("\nJson:");
        //        for (String fld : portalJson)
        //        {
        //           System.out.println(fld);
        //        }
        //        System.out.println();

        List<String> myCopy = new ArrayList<String>(portalJson);
        File f = new File(writeToDir + File.separator + "flds.json");
        FileUtils.writeLines(f, "utf8", myCopy);
    }

    protected void writePortalInstanceJsonToFile() throws IOException {
        String portalInstance = UUID.randomUUID().toString();
        File f = new File(writeToDir + File.separator + "PortalInstanceSetting.json");
        JSONObject json = new JSONObject();
        json.accumulate("portalInstance", portalInstance);
        json.accumulate("collectionName", collectionName);
        json.accumulate("imageBaseUrl", StringUtils.replace(attachmentURL, "/web_asset_store.xml", ""));
        FileUtils.writeStringToFile(f, json.toString(2), "utf8");
    }

    /**
     * 
     */
    public boolean index(QBDataSourceListenerIFace progressListener) {

        if (progressListener != null) {
            progressListener.loading();
        }

        long startTime = System.currentTimeMillis();

        IndexWriter[] writers = null;
        long totalRecs = 0;
        List<String> solrFldXml = null;
        List<String> portalFldJson = null;

        try {
            for (int i = 0; i < analyzers.length; i++) {
                files[i] = new File(fileNames[i]);
                analyzers[i] = new StandardAnalyzer(Version.LUCENE_47, CharArraySet.EMPTY_SET);
                FileUtils.deleteDirectory(files[i]);
            }

            System.out.println("Indexing to directory '" + INDEX_DIR + "'...");

            ExportMappingHelper map = new ExportMappingHelper(dbConn, mapping.getId());

            Map<Integer, String> shortNames = getShortNamesForFields(map);

            totalRecs = BasicSQLUtils.getCount(dbConn, "SELECT COUNT(*) FROM " + map.getCacheTblName());
            if (progressListener != null) {
                progressListener.loaded();
                progressListener.rowCount(totalRecs);
            }
            long procRecs = 0;

            Statement stmt = null;
            Statement stmt2 = null;
            Statement stmt3 = null;
            try {
                writers = new IndexWriter[analyzers.length];
                for (int i = 0; i < files.length; i++) {
                    writers[i] = new IndexWriter(FSDirectory.open(files[i]),
                            new IndexWriterConfig(Version.LUCENE_47, analyzers[i]));
                }

                System.out.println("Total Records: " + totalRecs);

                //stmt = dbConn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY);
                stmt = dbConn.createStatement();
                stmt.setFetchSize(Integer.MIN_VALUE);

                stmt2 = dbConn2.createStatement();

                stmt3 = dbConn3.createStatement();
                stmt3.setFetchSize(Integer.MIN_VALUE);

                //pStmt = dbConn3.prepareStatement("SELECT Text1 FROM preparation WHERE CollectionObjectID = ? AND Text1 IS NOT NULL");

                String sql = createQuery(map.getCacheTblName());
                System.out.println(sql);

                ResultSet rs = stmt.executeQuery(sql); //may consume all memory for giant caches
                ResultSetMetaData md = rs.getMetaData();

                StringBuilder indexStr = new StringBuilder();
                StringBuilder contents = new StringBuilder();
                StringBuilder sb = new StringBuilder();
                String lat1 = null, lng1 = null, lat2 = null, lng2 = null;
                solrFldXml = getFldsXmlForSchema(map, shortNames);
                portalFldJson = getModelInJson(map, shortNames);
                while (rs.next()) {
                    Document doc = new Document();
                    indexStr.setLength(0);
                    contents.setLength(0);
                    sb.setLength(0);
                    lat1 = null;
                    lng1 = null;
                    lat2 = null;
                    lng2 = null;
                    for (int c = 1; c <= md.getColumnCount(); c++) {
                        if (includeColumn(c)) {
                            String value = "";
                            try {
                                value = rs.getString(c);
                            } catch (Exception ex) {
                                ex.printStackTrace();
                            }
                            if (c == 1) {
                                //doc.add(new Field("spid", value, Field.Store.YES, Field.Index.ANALYZED));
                                doc.add(new StringField("spid", value, Field.Store.YES));
                            } else {
                                if (value != null) {
                                    ExportMappingInfo info = map.getMappingByColIdx(c - 2);
                                    String fldType = getSolrFldType(info);
                                    if (fldType.equals("string")) {
                                        if (info.isFullTextSearch()) {
                                            doc.add(new TextField(shortNames.get(c - 2), value, Field.Store.YES));
                                        } else {
                                            doc.add(new StringField(shortNames.get(c - 2), value, Field.Store.YES));
                                        }
                                    } else if (fldType.equals("boolean")) {
                                        doc.add(new StringField(shortNames.get(c - 2), value, Field.Store.YES));
                                    } else {
                                        if (fldType.endsWith("int")) {
                                            doc.add(new IntField(shortNames.get(c - 2), rs.getInt(c),
                                                    Field.Store.YES));
                                        } else if (fldType.endsWith("double")) {
                                            doc.add(new DoubleField(shortNames.get(c - 2), rs.getDouble(c),
                                                    Field.Store.YES));
                                        } else if (fldType.endsWith("long")) {
                                            doc.add(new LongField(shortNames.get(c - 2), rs.getLong(c),
                                                    Field.Store.YES));
                                        } else if (fldType.endsWith("float")) {
                                            doc.add(new FloatField(shortNames.get(c - 2), rs.getFloat(c),
                                                    Field.Store.YES));
                                        }
                                    }
                                    contents.append(StringUtils.isNotEmpty(value) ? value : " ");
                                    contents.append('\t');
                                    if ("latitude1"
                                            .equalsIgnoreCase(map.getMappingByColIdx(c - 2).getSpFldName())) {
                                        lat1 = value;
                                    } else if ("latitude2"
                                            .equalsIgnoreCase(map.getMappingByColIdx(c - 2).getSpFldName())) {
                                        lat2 = value;
                                    } else if ("longitude1"
                                            .equalsIgnoreCase(map.getMappingByColIdx(c - 2).getSpFldName())) {
                                        lng1 = value;
                                    } else if ("longitude2"
                                            .equalsIgnoreCase(map.getMappingByColIdx(c - 2).getSpFldName())) {
                                        lng2 = value;
                                    }
                                }
                            }
                        }
                    }
                    indexStr.append(contents);

                    //XXX what, exactly, are the reasons for the store/tokenize settings on these 2?
                    //Ditto for store setting for geoc and img below?
                    doc.add(new TextField("cs", indexStr.toString(), Field.Store.NO));
                    doc.add(new StringField("contents", contents.toString(), Field.Store.YES));

                    if (lat1 != null && lng1 != null) {
                        String geoc = lat1 + " " + lng1;
                        //                       if (lat2 != null && lng2 != null)
                        //                       {
                        //                          geoc += " " + lat2 + " " + lng2;
                        //                       }
                        doc.add(new StringField("geoc", geoc, Field.Store.NO));
                    }

                    String attachments = getAttachments(dbConn2, "collectionobject", rs.getInt(1), false);
                    if (attachments != null && attachments.length() > 0) {
                        doc.add(new StringField("img", attachments, Field.Store.YES));
                    }
                    writers[0].addDocument(doc);

                    //System.out.println(procRecs+" "+rs.getString(1));
                    procRecs++;
                    if (procRecs % 1000 == 0) {
                        System.out.println(procRecs);
                        if (progressListener != null) {
                            progressListener.currentRow(procRecs - 1);
                        }
                    }

                    if (procRecs % 100000 == 0) {
                        System.out.println("Optimizing...");
                        //writers[0].optimize();
                    }
                }
                rs.close();

                writePortalJsonToFile(portalFldJson);
                writeSolrFldXmlToFile(solrFldXml);
                writePortalInstanceJsonToFile();

            } catch (Exception ex) {
                UsageTracker.incrHandledUsageCount();
                edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(this.getClass(), ex);
                return false;
            } finally {

                if (stmt != null) {
                    try {
                        if (stmt != null)
                            stmt.close();
                        if (stmt2 != null)
                            stmt2.close();
                        if (stmt3 != null)
                            stmt3.close();

                    } catch (SQLException e) {
                        UsageTracker.incrHandledUsageCount();
                        edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(this.getClass(), e);
                        return false;
                    }
                }

            }

        } catch (Exception ex) {
            UsageTracker.incrHandledUsageCount();
            edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(this.getClass(), ex);
            return false;

        } finally {
            for (Analyzer a : analyzers) {
                a.close();
            }
            analyzers = null;

            for (IndexWriter writer : writers) {
                try {
                    System.out.println("Optimizing...");
                    //writer.optimize();
                    writer.close();
                    System.out.println("Done Optimizing.");

                } catch (Exception ex) {
                    UsageTracker.incrHandledUsageCount();
                    edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(this.getClass(), ex);
                    return false;

                }
                writer = null;
            }

        }

        buildZipFile();

        if (progressListener != null) {
            progressListener.done(totalRecs);
        }
        long endTime = System.currentTimeMillis();
        System.out.println("Time: " + (endTime - startTime) / 1000);
        return true;

    }

    private void buildZipFile() {
        try {
            ZipOutputStream out = new ZipOutputStream(new FileOutputStream(outputFileName));
            File src = new File(writeToDir);
            addToZip(out, src, "");
            out.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void addToZip(ZipOutputStream out, File src, String base) throws IOException {
        if (src.isDirectory()) {
            base = base + src.getName() + "/";
            out.putNextEntry(new ZipEntry(base));
            for (File f : src.listFiles()) {
                addToZip(out, f, base);
            }
        } else {
            out.putNextEntry(new ZipEntry(base + src.getName()));
            IOUtils.copy(new FileInputStream(src), out);
        }
    }

    /**
     * 
     */
    /*public void testSearch()
    {
    Statement stmt = null;
        
    String querystr = "(Pengelly) OR (Castilleja AND applegatei)";
    String term     = "contents";
    try
    {
        stmt = dbConn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY);
            
        analyzers = new Analyzer[fileNames.length];
        for (int i=0;i<analyzers.length;i++)
        {
            files[i]     = new File(fileNames[i]);
            analyzers[i] = new StandardAnalyzer(Version.LUCENE_30);
            readers[i]   = IndexReader.open(FSDirectory.open(files[i]), true);
        }
            
        HashMap<Integer, Integer> tblIdHash = new HashMap<Integer, Integer>();
            
        for (int inx=0;inx<analyzers.length;inx++)
        {
            long  startTime   = System.currentTimeMillis();
            Query query       = new QueryParser(Version.LUCENE_30, term, analyzers[inx]).parse(querystr);
            int   hitsPerPage = 10;
            searcher = new IndexSearcher(readers[inx]);
                
            TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
                
            System.out.println("\n------------- "+fileNames[inx] + " - Found: " + hits.length + " hits.");
                
            for (int i=0;i<hits.length;++i) 
            {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //System.out.println((i + 1) + ". " + d.get("id") + " -> "+ d.get("xref"));
                    
                tblIdHacssh.clear();
                    
                String pairStr = d.get("xref");
                if (StringUtils.isNotEmpty(pairStr))
                {
                    String [] pairs = StringUtils.split(d.get("xref"), ',');
                    for (String p : pairs)
                    {
                        String [] ids = StringUtils.split(p, '=');
                        tblIdHash.put(Integer.parseInt(ids[0]), Integer.parseInt(ids[1]));
                    }
                }
                    
                if (inx == 0)
                {
                    String id = d.get("id");
                    ResultSet         rs   = stmt.executeQuery("SELECT CatalogNumber FROM collectionobject WHERE CollectionObjectID = "+id);
                    ResultSetMetaData rsmd = rs.getMetaData();
                    while (rs.next())
                    {
                        for (int j=1;j<=rsmd.getColumnCount();j++)
                        {
                            System.out.print(rs.getObject(j) + "\t");
                        }
                        System.out.println();
                    }
                    rs.close();
                        
                    Integer agentId = tblIdHash.get(5);
                    if (agentId != null)
                    {
                        rs   = stmt.executeQuery("SELECT LastName, FirstName, MiddleInitial FROM agent WHERE AgentID = "+agentId);
                        rsmd = rs.getMetaData();
                        while (rs.next())
                        {
                            for (int j=1;j<=rsmd.getColumnCount();j++)
                            {
                                if (rs.getObject(j) != null) System.out.print(rs.getObject(j) + "\t");
                            }
                            System.out.println();
                        }
                        rs.close();
                    }
                } else
                {
                    Integer colObjId = tblIdHash.get(1);
                    if (colObjId != null)
                    {
                        ResultSet         rs   = stmt.executeQuery("SELECT CatalogNumber FROM collectionobject WHERE CollectionObjectID = "+colObjId);
                        ResultSetMetaData rsmd = rs.getMetaData();
                        while (rs.next())
                        {
                            for (int j=1;j<=rsmd.getColumnCount();j++)
                            {
                                System.out.print(rs.getObject(j) + "\t");
                            }
                            System.out.println();
                        }
                        rs.close();
                    }
                }
                    
            }
            System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0));
            searcher.close();
        }
            
            
        for (int i=0;i<analyzers.length;i++)
        {
            readers[i].close();
            analyzers[i].close();
        }
            
    } catch (SQLException e)
    {
        e.printStackTrace();
            
    } catch (IOException e)
    {
        e.printStackTrace();
            
    } catch (ParseException e)
    {
        e.printStackTrace();
    } finally
    {
        if (stmt != null)
        {
            try
            {
                stmt.close();
            } catch (SQLException e)
            {
                e.printStackTrace();
            }
        }
    }
    }*/

    //    public void testSearch()
    //    {
    //        Statement stmt = null;
    //        
    //        String querystr = "23033";//(Pengelly) OR (Castilleja AND applegatei)";
    //        String term     = "1";//"contents"
    //        try
    //        {
    //            //stmt = dbConn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY);
    //            
    //            analyzers = new Analyzer[fileNames.length];
    //            for (int i=0;i<analyzers.length;i++)
    //            {
    //                files[i]     = new File(fileNames[i]);
    //                analyzers[i] = new StandardAnalyzer(Version.LUCENE_30);
    //                readers[i]   = IndexReader.open(FSDirectory.open(files[i]), true);
    //            }
    //            
    //            //HashMap<Integer, Integer> tblIdHash = new HashMap<Integer, Integer>();
    //            
    //            for (int inx=0;inx<analyzers.length;inx++)
    //            {
    //                long  startTime   = System.currentTimeMillis();
    //                QueryParser queryParser = new QueryParser(Version.LUCENE_30, term, analyzers[inx]);
    //                Query query = queryParser.parse(querystr);
    //                
    //                int   hitsPerPage = 10;
    //                searcher = new IndexSearcher(readers[inx]);
    //                
    //                TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    //                searcher.search(query, collector);
    //                ScoreDoc[] hits = collector.topDocs().scoreDocs;
    //                
    //                System.out.println("\n------------- "+fileNames[inx] + " - Found: " + hits.length + " hits.");
    //                
    //                for (int i=0;i<hits.length;++i) 
    //                {
    //                    int docId = hits[i].doc;
    //                    Document d = searcher.doc(docId);
    //                    System.out.println((i + 1) + ". " + d.get("1"));
    //                    
    //                    //tblIdHacssh.clear();
    //                }
    //                    
    //                System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0));
    //                searcher.close();
    //            }
    //            
    //            
    //            for (int i=0;i<analyzers.length;i++)
    //            {
    //                readers[i].close();
    //                analyzers[i].close();
    //            }
    //            
    //        } catch (IOException e)
    //        {
    //            e.printStackTrace();
    //            
    //        } catch (ParseException e)
    //        {
    //            e.printStackTrace();
    //        } finally
    //        {
    //            if (stmt != null)
    //            {
    //                try
    //                {
    //                    stmt.close();
    //                } catch (SQLException e)
    //                {
    //                    e.printStackTrace();
    //                }
    //            }
    //        }
    //    }

    /**
     * @param args
     */
    public static void main(String[] args) {
        System.setProperty(SchemaI18NService.factoryName, "edu.ku.brc.specify.config.SpecifySchemaI18NService"); // Needed for Localization and Schema //$NON-NLS-1$
        System.setProperty(DBTableIdMgr.factoryName, "edu.ku.brc.specify.config.SpecifyDBTableIdMgr"); // Needed for Tree Field Names //$NON-NLS-1$

        try {
            //BuildSearchIndex2 bsi = new BuildSearchIndex2("asdfasdf");
            //bsi.createDBConnection("localhost", "3306", "kuherps", "root", "root");
            //BuildSearchIndex2 bsi = new BuildSearchIndex2("dwcterms10_28_11_2");
            //bsi.createDBConnection("localhost", "3306", "kuento", "root", "root");

            //BuildSearchIndex2 bsi = new BuildSearchIndex2(7, "index-specify-kuento");
            //bsi.createDBConnection("localhost", "3306", "bigento", "root", "root");

            //bsi.index(null);
            //bsi.getDbFieldInfoTypes();
            //bsi.testSearch();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}