Example usage for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes()

Source Link

Document

Returns the number of attributes.

Usage

From source file:com.rapidminer.tools.WekaTools.java

License:Open Source License

/**
 * Creates a RapidMiner example set from Weka instances. Only a label can be used
 * as special attributes, other types of special attributes are not
 * supported. If <code>attributeNamePrefix</code> is not null, the given
 * string prefix plus a number is used as attribute names.
 *//*  w w  w. j a  va2s.  c  om*/
public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix,
        int datamanagement) {
    int classIndex = instances.classIndex();

    // create example table

    // 1. Extract attributes
    List<Attribute> attributes = new ArrayList<Attribute>();
    int number = 1; // use for attribute names
    for (int i = 0; i < instances.numAttributes(); i++) {
        weka.core.Attribute wekaAttribute = instances.attribute(i);
        int rapidMinerAttributeValueType = Ontology.REAL;
        if (wekaAttribute.isNominal())
            rapidMinerAttributeValueType = Ontology.NOMINAL;
        else if (wekaAttribute.isString())
            rapidMinerAttributeValueType = Ontology.STRING;
        Attribute attribute = AttributeFactory.createAttribute(wekaAttribute.name(),
                rapidMinerAttributeValueType);
        if ((i != classIndex) && (attributeNamePrefix != null) && (attributeNamePrefix.length() > 0)) {
            attribute.setName(attributeNamePrefix + "_" + (number++));
        }
        if (wekaAttribute.isNominal()) {
            for (int a = 0; a < wekaAttribute.numValues(); a++) {
                String nominalValue = wekaAttribute.value(a);
                attribute.getMapping().mapString(nominalValue);
            }
        }
        attributes.add(attribute);
    }

    Attribute label = null;
    if (classIndex >= 0) {
        label = attributes.get(classIndex);
        label.setName("label");
    }

    // 2. Guarantee alphabetical mapping to numbers
    for (int j = 0; j < attributes.size(); j++) {
        Attribute attribute = attributes.get(j);
        if (attribute.isNominal())
            attribute.getMapping().sortMappings();
    }

    // 3. Read data
    MemoryExampleTable table = new MemoryExampleTable(attributes);
    DataRowFactory factory = new DataRowFactory(datamanagement, '.');
    // create data
    List<DataRow> dataList = new LinkedList<DataRow>();
    int numberOfRapidMinerAttributes = instances.numAttributes();
    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        DataRow dataRow = factory.create(numberOfRapidMinerAttributes);
        for (int a = 0; a < instances.numAttributes(); a++) {
            Attribute attribute = table.getAttribute(a);
            double wekaValue = instance.value(a);
            if (attribute.isNominal()) {
                String nominalValue = instances.attribute(a).value((int) wekaValue);
                dataRow.set(attribute, attribute.getMapping().mapString(nominalValue));
            } else {
                dataRow.set(attribute, wekaValue);
            }
        }
        dataRow.trim();
        dataList.add(dataRow);
    }

    // handle label extra
    table.readExamples(new ListDataRowReader(dataList.iterator()));

    // create and return example set
    return table.createExampleSet(label);
}

From source file:com.reactivetechnologies.analytics.core.eval.AdaBoostM1WithBuiltClassifiers.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    /** Changed here: Using the provided classifiers */
    /** End *///from  w ww. j a v a 2s.co  m

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    m_NumClasses = data.numClasses();
    if ((!m_UseResampling) && (m_Classifier instanceof WeightedInstancesHandler)) {
        buildClassifierWithWeights(data);
    } else {
        buildClassifierUsingResampling(data);
    }
}

From source file:com.reactivetechnologies.analytics.mapper.ARFFDataMapper.java

License:Open Source License

@Override
public Dataset mapStringToModel(JsonRequest request) throws ParseException {
    if (!(request instanceof ArffJsonRequest)) {
        throw new ParseException("Not an instance of " + ArffJsonRequest.class, -1);
    }/*from w ww.j a va  2s  .c o m*/
    try {
        ArffJsonRequest arff = (ArffJsonRequest) request;
        ArffReader ar = new ArffReader(new StringReader(request.toString()));
        Instances ins = ar.getData();
        ins.setClassIndex(arff.getClassIndex() >= 0 ? arff.getClassIndex() : ins.numAttributes() - 1);
        return new Dataset(ins);
    } catch (Exception e) {
        ParseException pe = new ParseException("Cannot convert JSON stream to ARFF", -1);
        pe.initCause(e);
        throw pe;
    }
}

From source file:com.reactivetechnologies.platform.analytics.mapper.JSONDataMapper.java

License:Open Source License

@Override
public TrainModel mapStringToModel(ArffJsonRequest request) throws ParseException {
    try {//  w ww . j  ava  2  s  . com
        ArffReader ar = new ArffReader(new StringReader(request.toString()));
        Instances ins = ar.getData();
        ins.setClassIndex(request.getClassIndex() >= 0 ? request.getClassIndex() : ins.numAttributes() - 1);
        return new TrainModel(ins);
    } catch (Exception e) {
        ParseException pe = new ParseException("Cannot convert JSON stream to ARFF", -1);
        pe.initCause(e);
        throw pe;
    }
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args/*from  www. ja va2  s. c om*/
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args//from   w  w w.j  a va 2  s  .c  o m
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * @param args//  w ww . j  av a2s .c  om
 */
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST
    // VARIOUS PARSER FUNCTIONALITIES
    System.out.println("Loading and processing TPCC traces...");

    Connection conn;
    try {

        String schemaname = ini.getProperty("schema");
        String connection = ini.getProperty("conn");
        String user = ini.getProperty("user");
        String password = ini.getProperty("password");
        conn = DriverManager.getConnection(connection + schemaname, user, password);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        conn.setAutoCommit(true);

        Statement stmt = conn.createStatement();

        String txnLogTable = ini.getProperty("txnLogTable");
        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`";
        ResultSet res = stmt.executeQuery(sqlstring);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            // System.out.println("SQL: " +sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        String accessLogTable = ini.getProperty("accessLogTable");

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");
        for (String str : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str
                        + " s WHERE tableid = \"" + str + "\" AND s.id = g.id";
                System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    a0.add(new Double(res.getObject(1).hashCode()));
                    a1.add(new Double(res.getObject(2).hashCode()));
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();
                                Instances data = retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    tree.buildClassifier(newData); // build classifier

                                }
                                System.out.println("CLASSIFICATION CONFIDENCE:  " + tree.getConfidenceFactor()
                                        + "\n " + tree.toString());

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Repeat the selection from the database removing duplicates, since they will
 * only increase the execution time. And run the tuples through the classifier
 * to populate the justifiedpartition column.
 * /* w ww .  j a v  a  2s . c o  m*/
 * @param tableProcessed
 * @param classifier
 * @param wa
 * @throws SQLException
 * @throws Exception
 */
public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes,
        Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception {
    if (true) {
        labelTest(tableProcessed, classifier, conn);
        return;
    }

    tableProcessed = removeQuotes(tableProcessed);

    // get from the DB the tuples content and their partitioning column
    String sqlstring = "SELECT distinct g.tupleid, ";
    for (String sc : attributes) {
        sqlstring += "s." + sc + ", ";
    }
    sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable
            + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s "
            + "WHERE s.relcloud_id = g.tupleid;";

    System.out.println(sqlstring);
    Statement stmt = conn.createStatement();

    // initializing the testing table to avoid complaints from classifier with
    // an hash partition like distribution
    if (!testingtable.equals(sampledtrainingtable)) {
        int i = 0;

        Object o = enumclassvalues.nextElement();

        // set everything to an existing value to ensure that every field is
        // covered
        stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '"
                + tableProcessed + "'");
        // and than sparkly in a bunch of other values (unsure whether it is
        // required);
        while (enumclassvalues.hasMoreElements()) {
            o = enumclassvalues.nextElement();

            // FIXME there might still be an issue in which tupleid%i do not exists,
            // and thus one of the "o" never appears in the instance...
            stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%"
                    + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'");
            i++;
        }
    }

    ResultSet res = stmt.executeQuery(sqlstring);
    // create an instance from the resultset
    Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
    res.close();

    data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1);
    Instances data_no_tupleid = makeLastNominal(data_tupleid);
    data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1);
    // remove tupleid from data_no_tupleid, still available in data_tupleid
    data_no_tupleid.deleteAttributeAt(0);

    // if(data_no_tupleid.classAttribute().numValues()>1){
    System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol);

    // use data that still has the tupleid and newData for the classification
    Enumeration enum_data_tupleid = data_tupleid.enumerateInstances();
    Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances();

    PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `"
            + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;");

    while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) {

        Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement();
        Instance instance = (Instance) enum_data_no_tupleid.nextElement();

        double part = classifier.classifyInstance(instance);
        if (part == Instance.missingValue())
            System.err.println("No classification for:" + instance.toString());
        updateJustCol.setInt(1, (int) part);
        updateJustCol.setInt(2, (int) tupIDinstance.value(0));

        // System.out.println(tableProcessed+" "+ instance.value(0) + " " +
        // tupIDinstance.classValue() +" "+ part);

        updateJustCol.execute();
        updateJustCol.clearParameters();

    }

    updateJustCol.close();

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Invokes filter to transform last parameter into a Nominal
 * //from   w w w.j av  a 2s . c  o m
 * @param data
 * @return
 * @throws Exception
 */
public static Instances makeLastNominal(Instances data) throws Exception {
    Instances newData;

    if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
        NumericToNominal ntn = new NumericToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    } else {
        StringToNominal ntn = new StringToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    }

    return newData;
}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Fetch from the database the content of the table and the partition lables,
 * and prepares a Weka Instance by sampling and cleaning it
 * /*from   w  w w.  j  av  a2s  . c  o m*/
 * @param tableProcessed
 * @param arraySc
 * @param conn
 * @return
 */
public Instances generateInstancesForTable(String tabname, ArrayList<SimpleCount> arraySc, Connection conn) {

    tabname = removeQuotes(tabname);

    Statement stmt;
    try {
        stmt = conn.createStatement();

        ResultSet test = stmt
                .executeQuery("SELECT count(*) FROM " + sampledtrainingtable + " WHERE " + pcol + " is null");

        // safety check, verifies that there are no nulls in input table.
        if (test.next() && test.getInt(1) > 0)
            throw new Exception("Table " + sampledtrainingtable + " contains nulls in " + pcol);

        // get from the DB the tuples content and their partitioning column
        String sqlstring = "SELECT ";
        for (SimpleCount sc : arraySc) {
            sqlstring += "s." + sc.colname + ", ";
        }

        sqlstring += "g." + pcol + " FROM " + "(SELECT tupleid," + pcol + " FROM `" + sampledtrainingtable
                + "` WHERE tableid = '" + tabname + "') AS g, relcloud_" + tabname + " AS s "
                + "WHERE s.relcloud_id = g.tupleid";

        System.out.println(sqlstring);

        ResultSet res = stmt.executeQuery(sqlstring);

        // create an instance from the resultset
        Instances data = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
        res.close();

        // prepare the data, by setting class attributed and sampling if required
        data = makeLastNominal(data);
        data.setClassIndex(data.numAttributes() - 1);
        data = sampleTraining(Double.parseDouble(ini.getProperty("Explanation.j48SamplingThreshold")), data);

        System.out.println(data.toSummaryString());

        return data;

    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}