Example usage for weka.core Instances toSummaryString

List of usage examples for weka.core Instances toSummaryString

Introduction

In this page you can find the example usage for weka.core Instances toSummaryString.

Prototype

public String toSummaryString() 

Source Link

Document

Generates a string summarizing the set of instances.

Usage

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Executes the flow item./* w  w  w .ja  v  a  2s.c  o m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int labelIndex;
    double[] dist;
    Enumeration enm;
    int i;

    result = null;

    if (m_InputToken.getPayload() instanceof Instance)
        inst = ((Instance) m_InputToken.getPayload()).dataset();
    else
        inst = (Instances) m_InputToken.getPayload();
    m_AttributeIndex.setData(inst);
    index = m_AttributeIndex.getIntIndex();

    m_Queue.clear();

    switch (m_Type) {
    case FULL:
        m_Queue.add(inst.toSummaryString());
        break;

    case FULL_ATTRIBUTE:
        m_Queue.add(getAttributeStats(inst, index));
        break;

    case FULL_CLASS:
        if (inst.classIndex() > -1)
            m_Queue.add(getAttributeStats(inst, inst.classIndex()));
        break;

    case HEADER:
        m_Queue.add(new Instances(inst, 0).toString());
        break;

    case RELATION_NAME:
        m_Queue.add(inst.relationName());
        break;

    case ATTRIBUTE_NAME:
        if (index != -1)
            m_Queue.add(inst.attribute(index).name());
        break;

    case ATTRIBUTE_NAMES:
        for (i = 0; i < inst.numAttributes(); i++)
            m_Queue.add(inst.attribute(i).name());
        break;

    case LABELS:
        if (index != -1) {
            enm = inst.attribute(index).enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case CLASS_LABELS:
        if (inst.classIndex() > -1) {
            enm = inst.classAttribute().enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case LABEL_COUNT:
        if (index > -1) {
            m_LabelIndex.setData(inst.attribute(index));
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]);
        }
        break;

    case LABEL_COUNTS:
        if (index > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts));
        break;

    case LABEL_DISTRIBUTION:
        if (index > -1) {
            dist = new double[inst.attributeStats(index).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(index).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case CLASS_LABEL_COUNT:
        if (inst.classIndex() > -1) {
            m_LabelIndex.setData(inst.classAttribute());
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]);
        }
        break;

    case CLASS_LABEL_COUNTS:
        if (inst.classIndex() > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts));
        break;

    case CLASS_LABEL_DISTRIBUTION:
        if (inst.classIndex() > -1) {
            dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case NUM_ATTRIBUTES:
        m_Queue.add(inst.numAttributes());
        break;

    case NUM_INSTANCES:
        m_Queue.add(inst.numInstances());
        break;

    case NUM_CLASS_LABELS:
        if ((inst.classIndex() != -1) && inst.classAttribute().isNominal())
            m_Queue.add(inst.classAttribute().numValues());
        break;

    case NUM_LABELS:
        if ((index != -1) && inst.attribute(index).isNominal())
            m_Queue.add(inst.attribute(index).numValues());
        break;

    case NUM_DISTINCT_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).distinctCount);
        break;

    case NUM_UNIQUE_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).uniqueCount);
        break;

    case NUM_MISSING_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).missingCount);
        break;

    case MIN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.min);
        break;

    case MAX:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.max);
        break;

    case MEAN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.mean);
        break;

    case STDEV:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.stdDev);
        break;

    case ATTRIBUTE_TYPE:
        if (index != -1)
            m_Queue.add(Attribute.typeToString(inst.attribute(index)));
        break;

    case CLASS_TYPE:
        if (inst.classIndex() != -1)
            m_Queue.add(Attribute.typeToString(inst.classAttribute()));
        break;

    default:
        result = "Unhandled info type: " + m_Type;
    }

    return result;
}

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args//from w  w w .  j  a v a  2 s. c  o m
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * @param args/*from   w  w w  .ja  v a 2  s  . co m*/
 */
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST
    // VARIOUS PARSER FUNCTIONALITIES
    System.out.println("Loading and processing TPCC traces...");

    Connection conn;
    try {

        String schemaname = ini.getProperty("schema");
        String connection = ini.getProperty("conn");
        String user = ini.getProperty("user");
        String password = ini.getProperty("password");
        conn = DriverManager.getConnection(connection + schemaname, user, password);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        conn.setAutoCommit(true);

        Statement stmt = conn.createStatement();

        String txnLogTable = ini.getProperty("txnLogTable");
        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`";
        ResultSet res = stmt.executeQuery(sqlstring);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            // System.out.println("SQL: " +sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        String accessLogTable = ini.getProperty("accessLogTable");

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");
        for (String str : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str
                        + " s WHERE tableid = \"" + str + "\" AND s.id = g.id";
                System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    a0.add(new Double(res.getObject(1).hashCode()));
                    a1.add(new Double(res.getObject(2).hashCode()));
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();
                                Instances data = retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    tree.buildClassifier(newData); // build classifier

                                }
                                System.out.println("CLASSIFICATION CONFIDENCE:  " + tree.getConfidenceFactor()
                                        + "\n " + tree.toString());

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Train the given classifier/* w  w  w  .jav a 2  s . c  o  m*/
 * 
 * @param newData
 * @param classifier
 * @throws Exception
 */
public static void trainClassifier(Instances newData, Classifier classifier) throws Exception {
    // if the class attributed is not unary we proceed regularly

    // verify the Classifier can handle this dataset
    classifier.getCapabilities().testWithFail(newData);

    System.out.println("BUILDING CLASSIFIER ON INSTANCE:" + newData.toSummaryString());

    long treeTstart = System.currentTimeMillis();
    classifier.buildClassifier(newData); // build classifier
    long treeTend = System.currentTimeMillis();
    System.out.println("CLASSIFIER BUILDING TIME: " + (treeTend - treeTstart) + "ms FROM: "
            + newData.numInstances() + " instances \n" + classifier.toString());

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Fetch from the database the content of the table and the partition lables,
 * and prepares a Weka Instance by sampling and cleaning it
 * /*from  w  w w  . jav a 2  s  .c o  m*/
 * @param tableProcessed
 * @param arraySc
 * @param conn
 * @return
 */
public Instances generateInstancesForTable(String tabname, ArrayList<SimpleCount> arraySc, Connection conn) {

    tabname = removeQuotes(tabname);

    Statement stmt;
    try {
        stmt = conn.createStatement();

        ResultSet test = stmt
                .executeQuery("SELECT count(*) FROM " + sampledtrainingtable + " WHERE " + pcol + " is null");

        // safety check, verifies that there are no nulls in input table.
        if (test.next() && test.getInt(1) > 0)
            throw new Exception("Table " + sampledtrainingtable + " contains nulls in " + pcol);

        // get from the DB the tuples content and their partitioning column
        String sqlstring = "SELECT ";
        for (SimpleCount sc : arraySc) {
            sqlstring += "s." + sc.colname + ", ";
        }

        sqlstring += "g." + pcol + " FROM " + "(SELECT tupleid," + pcol + " FROM `" + sampledtrainingtable
                + "` WHERE tableid = '" + tabname + "') AS g, relcloud_" + tabname + " AS s "
                + "WHERE s.relcloud_id = g.tupleid";

        System.out.println(sqlstring);

        ResultSet res = stmt.executeQuery(sqlstring);

        // create an instance from the resultset
        Instances data = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
        res.close();

        // prepare the data, by setting class attributed and sampling if required
        data = makeLastNominal(data);
        data.setClassIndex(data.numAttributes() - 1);
        data = sampleTraining(Double.parseDouble(ini.getProperty("Explanation.j48SamplingThreshold")), data);

        System.out.println(data.toSummaryString());

        return data;

    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

public Instances generateInstancesForTableDemo(String tabname, ArrayList<SimpleCount> arraySc,
        Connection conn) {/*from  w  w  w .  ja  v  a 2s  . c om*/

    tabname = removeQuotes(tabname);

    Statement stmt;
    try {
        stmt = conn.createStatement();

        // get from the DB the tuples content and their partitioning column
        String sqlstring = "SELECT tableid,g.tupleid,";
        for (SimpleCount sc : arraySc) {
            sqlstring += "s." + sc.colname + ", ";
        }

        sqlstring += "0 as partitionLabel" + " FROM " + "(SELECT tableid,tupleid," + pcol + " FROM `"
                + sampledtrainingtable + "` WHERE tableid = '" + tabname + "') AS g, relcloud_" + tabname
                + " AS s " + "WHERE s.relcloud_id = g.tupleid";

        System.out.println(sqlstring);

        ResultSet res = stmt.executeQuery(sqlstring);

        // create an instance from the resultset
        Instances data = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
        res.close();

        // prepare the data, by setting class attributed and sampling if required
        //data = sampleTraining(Double.parseDouble("2000"), data);

        System.out.println(data.toSummaryString());

        return data;

    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*w ww . ja  v  a 2 s.c om*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*from   w w w  .j a  va 2 s  .c o m*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * //  w  ww .  java  2  s  .c  om
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDB.java

License:Open Source License

public static Instances findSimilarCases(final String attrname, final double x, final double y, final int year,
        final String season, final int gapSize, final int gapPosition, final boolean isDuringRising,
        final boolean hasDownstream, final boolean hasUpstream, final String flow) throws Exception {
    /* build the current case */
    final StringBuilder newsb = new StringBuilder(DATABASE_AS_STRINGBUILDER);
    newsb.append(attrname).append(",").append(x).append(",").append(y).append(",")

            .append(gapSize).append(",").append(gapPosition).append(",")

            .append(season).append(",").append(year).append(",")

            .append(isDuringRising).append(",").append(flow).append(",")

            .append(hasDownstream).append(",").append(hasUpstream).append(",")

            .append("?").append(",").append("?").append(",").append("?").append(",").append("?").append(",")
            .append("?").append(",").append("?").append(",").append(0) // MAE
            .append(",").append(0) // RMSE
            .append(",").append(0) // RSR
            .append(",").append(0) // PBIAS
            .append(",").append(1) // NS
            .append(",").append(1) // IOA
            .append(",").append(true) // BEST SOLUTION          
            .append("\n");
    final Instances tmpDB = WekaDataAccessUtil.loadInstancesFromCSVString(newsb.toString(), false);

    final Instance newcase = tmpDB.instance(tmpDB.numInstances() - 1);

    /* compute NN for the current case */
    final Instances knn = WekaMachineLearningUtil.computeNearestNeighbours(tmpDB, newcase, 10,
            "2,3,4,6,7,8,9,10,23");
    knn.add(0, newcase);//from w w  w  . j a  v  a  2 s .  co m

    System.out.println(knn.toSummaryString());

    return knn;

}