Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:com.reactivetechnologies.analytics.core.eval.StackingWithBuiltClassifiers.java

License:Open Source License

/**
 * Generates the meta data/*from  www . j a  v  a2  s . c  o  m*/
 *
 * @param newData the data to work on
 * @param random the random number generator to use for cross-validation
 * @throws Exception if generation fails
 */
@Override
protected void generateMetaLevel(Instances newData, Random random) throws Exception {

    Instances metaData = metaFormat(newData);
    m_MetaFormat = new Instances(metaData, 0);
    for (int j = 0; j < m_NumFolds; j++) {

        /** Changed here */
        //Instances train = newData.trainCV(m_NumFolds, j, random);
        // DO NOT Build base classifiers
        /*for (int i = 0; i < m_Classifiers.length; i++) {
            getClassifier(i).buildClassifier(train);
        }*/
        /** End change */

        // Classify test instances and add to meta data
        Instances test = newData.testCV(m_NumFolds, j);
        for (int i = 0; i < test.numInstances(); i++) {
            metaData.add(metaInstance(test.instance(i)));
        }
    }

    m_MetaClassifier.buildClassifier(metaData);
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args/*from w w w . j  a  v a  2s  .co  m*/
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Train the given classifier//from   ww  w. j a  v a  2 s.  c om
 * 
 * @param newData
 * @param classifier
 * @throws Exception
 */
public static void trainClassifier(Instances newData, Classifier classifier) throws Exception {
    // if the class attributed is not unary we proceed regularly

    // verify the Classifier can handle this dataset
    classifier.getCapabilities().testWithFail(newData);

    System.out.println("BUILDING CLASSIFIER ON INSTANCE:" + newData.toSummaryString());

    long treeTstart = System.currentTimeMillis();
    classifier.buildClassifier(newData); // build classifier
    long treeTend = System.currentTimeMillis();
    System.out.println("CLASSIFIER BUILDING TIME: " + (treeTend - treeTstart) + "ms FROM: "
            + newData.numInstances() + " instances \n" + classifier.toString());

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Applies sample training to reduce the input Instances to a specified size.
 * //from   www .  j  a  va  2s. com
 * @param classifier_sampling_threshold
 * @param newData1
 * @return
 * @throws Exception
 */
public static Instances sampleTraining(Double classifier_sampling_threshold, Instances newData1)
        throws Exception {
    String[] options;
    Instances newData;
    if (newData1.numInstances() > classifier_sampling_threshold) {
        double percentage = (double) 100 * ((double) classifier_sampling_threshold)
                / ((double) newData1.numInstances());

        Resample r = new Resample();

        options = new String[4];
        options[0] = "-C";
        options[1] = "last";
        options[2] = "-Z";
        options[3] = "" + percentage;

        r.setOptions(options);
        r.setInputFormat(newData1);
        newData = Filter.useFilter(newData1, r);

    } else {
        newData = newData1;
    }

    return newData;
}

From source file:com.sliit.normalize.NormalizeDataset.java

public String normalizeDataset() {
    System.out.println("start normalizing data");

    String filePathOut = "";
    try {//  w  w w.ja  va 2  s  .c om

        CSVLoader loader = new CSVLoader();
        if (reducedDiemensionFile != null) {

            loader.setSource(reducedDiemensionFile);
        } else {
            if (tempFIle != null && tempFIle.exists()) {

                loader.setSource(tempFIle);
            } else {

                loader.setSource(csvFile);
            }
        }
        Instances dataInstance = loader.getDataSet();
        Normalize normalize = new Normalize();
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);
        normalize.setInputFormat(dataInstance);
        String directory = csvFile.getParent();
        outputFile = new File(directory + "/" + "normalized" + csvFile.getName());
        if (!outputFile.exists()) {

            outputFile.createNewFile();
        }
        CSVSaver saver = new CSVSaver();
        saver.setFile(outputFile);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            normalize.input(dataInstance.instance(i));
        }
        normalize.batchFinished();
        Instances outPut = new Instances(dataInstance, 0);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            outPut.add(normalize.output());
        }
        Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1);
        for (int j = 0; j < attribute.numValues(); j++) {

            if (attribute.value(j).equals("normal.")) {
                outPut.renameAttributeValue(attribute, attribute.value(j), "0");
            } else {
                outPut.renameAttributeValue(attribute, attribute.value(j), "1");
            }
        }
        saver.setInstances(outPut);
        saver.writeBatch();
        writeToNewFile(directory);
        filePathOut = directory + "norm" + csvFile.getName();
        if (tempFIle != null) {

            tempFIle.delete();
        }
        if (reducedDiemensionFile != null) {

            reducedDiemensionFile.delete();
        }
        outputFile.delete();
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    } catch (Exception e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return filePathOut;
}

From source file:com.sliit.normalize.NormalizeDataset.java

public boolean updateStringValues(Map<Integer, String> values) {
    System.out.println("updating String Values");

    boolean status = false;
    try {//  w w w .j a v  a 2  s  .  c o  m

        csv.setSource(csvFile);
        Instances dataInstance = csv.getDataSet();
        for (int i = 0; i < dataInstance.numInstances(); i++) {

            if (values.containsKey(i)) {

                Attribute attribute = dataInstance.attribute(i);
                for (int j = 0; j < attribute.numValues(); j++) {
                    dataInstance.renameAttributeValue(attribute, attribute.value(j), j + "");
                }
            }
        }
        tempFIle = new File(csvFile.getParent() + "/temp.csv");
        CSVSaver saver = new CSVSaver();
        saver.setInstances(dataInstance);
        saver.setFile(tempFIle);
        saver.writeBatch();
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return status;
}

From source file:com.sliit.normalize.NormalizeDataset.java

public int whiteningData() {
    System.out.println("whiteningData");

    int nums = 0;
    try {//from www  .  j ava 2s  . co m

        if (tempFIle != null && tempFIle.exists()) {

            csv.setSource(tempFIle);
        } else {

            csv.setSource(csvFile);
        }
        Instances instances = csv.getDataSet();
        if (instances.numAttributes() > 10) {
            instances.setClassIndex(instances.numAttributes() - 1);
            RandomProjection random = new RandomProjection();
            random.setDistribution(
                    new SelectedTag(RandomProjection.GAUSSIAN, RandomProjection.TAGS_DSTRS_TYPE));
            reducedDiemensionFile = new File(csvFile.getParent() + "/tempwhite.csv");
            if (!reducedDiemensionFile.exists()) {

                reducedDiemensionFile.createNewFile();
            }
            // CSVSaver saver = new CSVSaver();
            /// saver.setFile(reducedDiemensionFile);
            random.setInputFormat(instances);
            //saver.setRetrieval(AbstractSaver.INCREMENTAL);
            BufferedWriter writer = new BufferedWriter(new FileWriter(reducedDiemensionFile));
            for (int i = 0; i < instances.numInstances(); i++) {

                random.input(instances.instance(i));
                random.setNumberOfAttributes(10);
                random.setReplaceMissingValues(true);
                writer.write(random.output().toString());
                writer.newLine();
                //saver.writeIncremental(random.output());
            }
            writer.flush();
            writer.close();
            nums = random.getNumberOfAttributes();
        } else {

            nums = instances.numAttributes();
        }
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    } catch (Exception e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return nums;
}

From source file:com.sliit.views.DataVisualizerPanel.java

void getRocCurve() {
    try {/*  www.  j  av  a2 s .c o  m*/
        Instances data;
        data = new Instances(new BufferedReader(new FileReader(datasetPathText.getText())));
        data.setClassIndex(data.numAttributes() - 1);

        // train classifier
        Classifier cl = new NaiveBayes();
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(cl, data, 10, new Random(1));

        // generate curve
        ThresholdCurve tc = new ThresholdCurve();
        int classIndex = 0;
        Instances result = tc.getCurve(eval.predictions(), classIndex);

        // plot curve
        ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        vmc.setName(result.relationName());
        PlotData2D tempd = new PlotData2D(result);
        tempd.setPlotName(result.relationName());
        tempd.addInstanceNumberAttribute();
        // specify which points are connected
        boolean[] cp = new boolean[result.numInstances()];
        for (int n = 1; n < cp.length; n++) {
            cp[n] = true;
        }
        tempd.setConnectPoints(cp);
        // add plot
        vmc.addPlot(tempd);

        // display curve
        String plotName = vmc.getName();
        final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Classifier Visualize: " + plotName);
        jf.setSize(500, 400);
        jf.getContentPane().setLayout(new BorderLayout());
        jf.getContentPane().add(vmc, BorderLayout.CENTER);
        jf.addWindowListener(new java.awt.event.WindowAdapter() {
            public void windowClosing(java.awt.event.WindowEvent e) {
                jf.dispose();
            }
        });
        jf.setVisible(true);
    } catch (IOException ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.sliit.views.KNNView.java

void getRocCurve() {
    try {/*from   w  w w . j a va  2s  .co m*/
        Instances data;
        data = new Instances(new BufferedReader(new java.io.FileReader(PredictorPanel.modalText.getText())));
        data.setClassIndex(data.numAttributes() - 1);

        // train classifier
        Classifier cl = new NaiveBayes();
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(cl, data, 10, new Random(1));

        // generate curve
        ThresholdCurve tc = new ThresholdCurve();
        int classIndex = 0;
        Instances result = tc.getCurve(eval.predictions(), classIndex);

        // plot curve
        ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        vmc.setName(result.relationName());
        PlotData2D tempd = new PlotData2D(result);
        tempd.setPlotName(result.relationName());
        tempd.addInstanceNumberAttribute();
        // specify which points are connected
        boolean[] cp = new boolean[result.numInstances()];
        for (int n = 1; n < cp.length; n++) {
            cp[n] = true;
        }
        tempd.setConnectPoints(cp);
        // add plot
        vmc.addPlot(tempd);

        rocPanel.removeAll();
        rocPanel.add(vmc, "vmc", 0);
        rocPanel.revalidate();

    } catch (IOException ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.sliit.views.SVMView.java

/**
 * draw ROC curve/*from  w  w w  .j ava  2  s.  c o  m*/
 */
void getRocCurve() {
    try {
        Instances data;
        data = new Instances(new BufferedReader(new FileReader(PredictorPanel.modalText.getText())));
        data.setClassIndex(data.numAttributes() - 1);

        //train classifier
        Classifier cl = new NaiveBayes();
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(cl, data, 10, new Random(1));

        // generate curve
        ThresholdCurve tc = new ThresholdCurve();
        int classIndex = 0;
        Instances result = tc.getCurve(eval.predictions(), classIndex);

        // plot curve
        ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        vmc.setName(result.relationName());
        PlotData2D tempd = new PlotData2D(result);
        tempd.setPlotName(result.relationName());
        tempd.addInstanceNumberAttribute();
        // specify which points are connected
        boolean[] cp = new boolean[result.numInstances()];
        for (int n = 1; n < cp.length; n++) {
            cp[n] = true;
        }
        tempd.setConnectPoints(cp);
        // add plot
        vmc.addPlot(tempd);

        //            rocPanel.removeAll();
        //            rocPanel.add(vmc, "vmc", 0);
        //            rocPanel.revalidate();
    } catch (IOException ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex);
    }
}