milk.experiment.MIInstanceQuery.java Source code

Java tutorial

Introduction

Here is the source code for milk.experiment.MIInstanceQuery.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    InstanceQuery.java
 *    Copyright (C) 1999 University of Waikato
 *
 */

package milk.experiment;

import java.io.*;
import java.math.*;
import java.net.InetAddress;
import java.sql.*;
import java.util.*;
import java.util.Date;
import weka.core.*;

/**
 * Convert the results of a database query into instances. The jdbc
 * driver and database to be used default to "jdbc.idbDriver" and
 * "jdbc:idb=experiments.prp". These may be changed by creating
 * a java properties file called DatabaseUtils.props in user.home or
 * the current directory. eg:<p>
 *
 * <code><pre>
 * jdbcDriver=jdbc.idbDriver
 * jdbcURL=jdbc:idb=experiments.prp
 * </pre></code><p>
 *
 * Command line use just outputs the instances to System.out.
 *
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision: 1.14 $
 */
public class MIInstanceQuery extends MIDatabaseUtils implements OptionHandler {

    /** Determines whether sparse data is created */
    boolean m_CreateSparseData = false;

    /** Query to execute */
    String m_Query = "SELECT * from ?";

    /**
     * Sets up the database drivers
     *
     * @exception Exception if an error occurs
     */
    public MIInstanceQuery() throws Exception {

        super();
    }

    /**
     * Returns an enumeration describing the available options <p>
     *
     */
    public Enumeration listOptions() {
        Vector newVector = new Vector(2);

        newVector.addElement(new Option("\tSQL query to execute.", "Q", 1, "-Q <query>"));
        newVector.addElement(new Option("\tReturn sparse rather than normal " + "instances.", "S", 0, "-S"));
        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     *
     * Valid options are:<p>
     * 
     * -S <br>
     * Return a set of sparse instances rather than normal instances.<p>
     *
     * @param options the list of options as an array of strings
     * @exception Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        setSparseData(Utils.getFlag('S', options));

        String optionString;

        optionString = Utils.getOption('Q', options);
        if (optionString.length() != 0) {
            setQuery(optionString);
        }
    }

    /**
     * Returns the tip text for this property
     * @return tip text for this property suitable for
     * displaying in the explorer/experimenter gui
     */
    public String queryTipText() {
        return "The SQL query to execute against the database.";
    }

    /**
     * Set the query to execute against the database
     * @param q the query to execute
     */
    public void setQuery(String q) {
        m_Query = q;
    }

    /**
     * Get the query to execute against the database
     * @return the query
     */
    public String getQuery() {
        return m_Query;
    }

    /**
     * Returns the tip text for this property
     * @return tip text for this property suitable for
     * displaying in the explorer/experimenter gui
     */
    public String sparseDataTipText() {
        return "Encode data as sparse instances.";
    }

    /**
     * Sets whether data should be encoded as sparse instances
     * @param s true if data should be encoded as a set of sparse instances
     */
    public void setSparseData(boolean s) {
        m_CreateSparseData = s;
    }

    /**
     * Gets whether data is to be returned as a set of sparse instances
     * @return true if data is to be encoded as sparse instances
     */
    public boolean getSparseData() {
        return m_CreateSparseData;
    }

    /**
     * Gets the current settings of InstanceQuery
     *
     * @return an array of strings suitable for passing to setOptions()
     */
    public String[] getOptions() {

        String[] options = new String[3];
        int current = 0;

        options[current] = "-Q";
        options[current++] = getQuery();

        if (getSparseData()) {
            options[current++] = "-S";
        }

        while (current < options.length) {
            options[current++] = "";
        }

        return options;
    }

    /**
     * Makes a database query using the query set through the -Q option 
     * to convert a table into a set of instances
     *
     * @return the instances contained in the result of the query
     * @exception Exception if an error occurs
     */
    public Instances retrieveInstances() throws Exception {
        return retrieveInstances(m_Query);
    }

    /**
     * Makes a database query to convert a table into a set of instances
     *
     * @param query the query to convert to instances
     * @return the instances contained in the result of the query
     * @exception Exception if an error occurs
     */
    public Instances retrieveInstances(String query) throws Exception {

        System.err.println("Executing query: " + query);
        connectToDatabase();
        if (execute(query) == false) {
            throw new Exception("Query didn't produce results");
        }
        ResultSet rs = getResultSet();
        System.err.println("Getting metadata...");
        ResultSetMetaData md = rs.getMetaData();

        // Determine structure of the instances
        int numAttributes = md.getColumnCount();
        int[] attributeTypes = new int[numAttributes];
        Hashtable[] nominalIndexes = new Hashtable[numAttributes];
        FastVector[] nominalStrings = new FastVector[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {
            switch (md.getColumnType(i)) {
            case Types.CHAR:
            case Types.VARCHAR:
            case Types.LONGVARCHAR:
            case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY:
                //System.err.println("String --> nominal");
                attributeTypes[i - 1] = Attribute.NOMINAL;
                nominalIndexes[i - 1] = new Hashtable();
                nominalStrings[i - 1] = new FastVector();
                break;
            case Types.BIT:
                ////System.err.println("boolean --> nominal");
                attributeTypes[i - 1] = Attribute.NOMINAL;
                nominalIndexes[i - 1] = new Hashtable();
                nominalIndexes[i - 1].put("false", new Double(0));
                nominalIndexes[i - 1].put("true", new Double(1));
                nominalStrings[i - 1] = new FastVector();
                nominalStrings[i - 1].addElement("false");
                nominalStrings[i - 1].addElement("true");
                break;
            case Types.NUMERIC:
            case Types.DECIMAL:
                //System.err.println("BigDecimal --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.TINYINT:
                //System.err.println("byte --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.SMALLINT:
                //System.err.println("short --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.INTEGER:
                //System.err.println("int --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.BIGINT:
                //System.err.println("long --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.REAL:
                //System.err.println("float --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            case Types.FLOAT:
            case Types.DOUBLE:
                //System.err.println("double --> numeric");
                attributeTypes[i - 1] = Attribute.NUMERIC;
                break;
            /*case Types.BINARY:
               case Types.VARBINARY:
               case Types.LONGVARBINARY:
            //System.err.println("byte[] --> unsupported");
            attributeTypes[i - 1] = Attribute.STRING;
            break; */
            case Types.DATE:
            case Types.TIME:
            case Types.TIMESTAMP:
                attributeTypes[i - 1] = Attribute.DATE;
                break;
            default:
                //System.err.println("Unknown column type");
                attributeTypes[i - 1] = Attribute.STRING;
            }
        }

        // Step through the tuples
        System.err.println("Creating instances...");
        FastVector instances = new FastVector();
        int rowCount = 0;
        while (rs.next()) {
            if (rowCount % 100 == 0) {
                System.err.print("read " + rowCount + " instances \r");
                System.err.flush();
            }
            double[] vals = new double[numAttributes];
            for (int i = 1; i <= numAttributes; i++) {
                switch (md.getColumnType(i)) {
                case Types.CHAR:
                case Types.VARCHAR:
                case Types.LONGVARCHAR:
                case Types.BINARY:
                case Types.VARBINARY:
                case Types.LONGVARBINARY:
                    String str = rs.getString(i);

                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        Double index = (Double) nominalIndexes[i - 1].get(str);
                        if (index == null) {
                            index = new Double(nominalStrings[i - 1].size());
                            nominalIndexes[i - 1].put(str, index);
                            nominalStrings[i - 1].addElement(str);
                        }
                        vals[i - 1] = index.doubleValue();
                    }
                    break;
                case Types.BIT:
                    boolean boo = rs.getBoolean(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (boo ? 1.0 : 0.0);
                    }
                    break;
                case Types.NUMERIC:
                case Types.DECIMAL:
                    //     BigDecimal bd = rs.getBigDecimal(i, 4); 
                    double dd = rs.getDouble(i);
                    // Use the column precision instead of 4?
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        //       newInst.setValue(i - 1, bd.doubleValue());
                        vals[i - 1] = dd;
                    }
                    break;
                case Types.TINYINT:
                    byte by = rs.getByte(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) by;
                    }
                    break;
                case Types.SMALLINT:
                    short sh = rs.getByte(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) sh;
                    }
                    break;
                case Types.INTEGER:
                    int in = rs.getInt(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) in;
                    }
                    break;
                case Types.BIGINT:
                    long lo = rs.getLong(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) lo;
                    }
                    break;
                case Types.REAL:
                    float fl = rs.getFloat(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) fl;
                    }
                    break;
                case Types.FLOAT:
                case Types.DOUBLE:
                    double dou = rs.getDouble(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        vals[i - 1] = (double) dou;
                    }
                    break;
                /*case Types.BINARY:
                case Types.VARBINARY:
                case Types.LONGVARBINARY: */
                case Types.DATE:
                case Types.TIME:
                case Types.TIMESTAMP:
                    Date date = rs.getDate(i);
                    if (rs.wasNull()) {
                        vals[i - 1] = Instance.missingValue();
                    } else {
                        // TODO: Do a value check here.
                        vals[i - 1] = (double) date.getTime();
                    }
                    break;
                default:
                    vals[i - 1] = Instance.missingValue();
                }
            }
            Instance newInst;
            if (m_CreateSparseData) {
                newInst = new SparseInstance(1.0, vals);
            } else {
                newInst = new Instance(1.0, vals);
            }
            instances.addElement(newInst);
            rowCount++;
        }
        //disconnectFromDatabase();  (perhaps other queries might be made)

        // Create the header and add the instances to the dataset
        System.err.println("Creating header...");
        FastVector attribInfo = new FastVector();
        for (int i = 0; i < numAttributes; i++) {
            String attribName = md.getColumnName(i + 1);
            switch (attributeTypes[i]) {
            case Attribute.NOMINAL:
                attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
                break;
            case Attribute.NUMERIC:
                attribInfo.addElement(new Attribute(attribName));
                break;
            case Attribute.STRING:
                attribInfo.addElement(new Attribute(attribName, (FastVector) null));
                break;
            case Attribute.DATE:
                attribInfo.addElement(new Attribute(attribName, (String) null));
                break;
            default:
                throw new Exception("Unknown attribute type");
            }
        }
        Instances result = new Instances("QueryResult", attribInfo, instances.size());
        for (int i = 0; i < instances.size(); i++) {
            result.add((Instance) instances.elementAt(i));
        }
        rs.close();
        return result;
    }

    /**
     * Test the class from the command line. The instance
     * query should be specified with -Q sql_query
     *
     * @param args contains options for the instance query
     */
    public static void main(String args[]) {

        try {
            MIInstanceQuery iq = new MIInstanceQuery();
            String query = Utils.getOption('Q', args);
            if (query.length() == 0) {
                iq.setQuery("select * from Experiment_index");
            } else {
                iq.setQuery(query);
            }
            iq.setOptions(args);
            try {
                Utils.checkForRemainingOptions(args);
            } catch (Exception e) {
                System.err.println("Options for weka.experiment.InstanceQuery:\n");
                Enumeration en = iq.listOptions();
                while (en.hasMoreElements()) {
                    Option o = (Option) en.nextElement();
                    System.err.println(o.synopsis() + "\n" + o.description());
                }
                System.exit(1);
            }

            Instances aha = iq.retrieveInstances();
            iq.disconnectFromDatabase();
            // The dataset may be large, so to make things easier we'll
            // output an instance at a time (rather than having to convert
            // the entire dataset to one large string)
            System.out.println(new Instances(aha, 0));
            for (int i = 0; i < aha.numInstances(); i++) {
                System.out.println(aha.instance(i));
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.err.println(e.getMessage());
        }
    }
}