Example usage for weka.core Instances testCV

List of usage examples for weka.core Instances testCV

Introduction

In this page you can find the example usage for weka.core Instances testCV.

Prototype



public Instances testCV(int numFolds, int numFold) 

Source Link

Document

Creates the test set for one fold of a cross-validation on the dataset.

Usage

From source file:mulan.evaluation.Evaluator.java

License:Open Source License

private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data,
        boolean hasMeasures, List<Measure> measures, int someFolds) {
    Evaluation[] evaluation = new Evaluation[someFolds];

    Instances workingSet = new Instances(data.getDataSet());
    workingSet.randomize(new Random(seed));
    for (int i = 0; i < someFolds; i++) {
        System.out.println("Fold " + (i + 1) + "/" + someFolds);
        try {//from   ww  w  .  ja va 2 s.  c o  m
            Instances train = workingSet.trainCV(someFolds, i);
            Instances test = workingSet.testCV(someFolds, i);
            MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData());
            MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData());
            MultiLabelLearner clone = learner.makeCopy();
            clone.build(mlTrain);
            if (hasMeasures)
                evaluation[i] = evaluate(clone, mlTest, measures);
            else
                evaluation[i] = evaluate(clone, mlTest);
        } catch (Exception ex) {
            Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return new MultipleEvaluation(evaluation);
}

From source file:net.sf.bddbddb.order.WekaInterface.java

License:LGPL

public static double cvError(int numFolds, Instances data0, String cClassName) {
    if (data0.numInstances() < numFolds)
        return Double.NaN; //more folds than elements
    if (numFolds == 0)
        return Double.NaN; // no folds
    if (data0.numInstances() == 0)
        return 0; //no instances

    Instances data = new Instances(data0);
    //data.randomize(new Random(System.currentTimeMillis()));
    data.stratify(numFolds);//from   w ww .  j av  a  2s .  c  o  m
    Assert._assert(data.classAttribute() != null);
    double[] estimates = new double[numFolds];
    for (int i = 0; i < numFolds; ++i) {
        Instances trainData = data.trainCV(numFolds, i);
        Assert._assert(trainData.classAttribute() != null);
        Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances.");

        Instances testData = data.testCV(numFolds, i);
        Assert._assert(testData.classAttribute() != null);
        Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances.");

        int temp = FindBestDomainOrder.TRACE;
        FindBestDomainOrder.TRACE = 0;
        Classifier classifier = buildClassifier(cClassName, trainData);
        FindBestDomainOrder.TRACE = temp;
        int count = testData.numInstances();
        double loss = 0;
        double sum = 0;
        for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) {
            Instance instance = (Instance) e.nextElement();
            Assert._assert(instance != null);
            Assert._assert(instance.classAttribute() != null
                    && instance.classAttribute() == trainData.classAttribute());
            try {
                double testClass = classifier.classifyInstance(instance);
                double weight = instance.weight();
                if (testClass != instance.classValue())
                    loss += weight;
                sum += weight;
            } catch (Exception ex) {
                FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex);
            }
        }
        estimates[i] = 1 - loss / sum;
    }
    double average = 0;
    for (int i = 0; i < numFolds; ++i)
        average += estimates[i];

    return average / numFolds;
}

From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierMultiAttribute.java

License:Open Source License

@Override
public void execute(Graph g) {
    Actor[] source = g.getActor((String) parameter[1].getValue());
    if (source != null) {

        // create the atributes for each artist
        FastVector sourceTypes = new FastVector();
        Actor[] dest = g.getActor((String) parameter[3].getValue());
        if (dest != null) {
            // create the Instances set backing this object
            Instances masterSet = null;
            Instance[] trainingData = new Instance[source.length];
            for (int i = 0; i < source.length; ++i) {
                // First, acquire the instance objects for each actor
                Property p = null;/*from   w ww  .java2 s .  co  m*/
                if ((Boolean) parameter[10].getValue()) {
                    p = source[i].getProperty((String) parameter[2].getValue() + g.getID());
                } else {
                    p = source[i].getProperty((String) parameter[2].getValue());
                }
                if (p != null) {
                    Object[] values = p.getValue();
                    if (values.length > 0) {
                        sourceTypes.addElement(source[i].getID());
                        trainingData[i] = (Instance) ((Instance) values[0]).copy();
                        // assume that this Instance has a backing dataset 
                        // that contains all Instance objects to be tested
                        if (masterSet == null) {
                            masterSet = new Instances(trainingData[i].dataset(), source.length);
                        }
                        masterSet.add(trainingData[i]);
                        sourceTypes.addElement(source[i].getID());
                    } else {
                        trainingData[i] = null;
                        Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                                "Actor " + source[i].getType() + ":" + source[i].getID()
                                        + " does not have an Instance value of property ID " + p.getType());
                    }
                } else {
                    trainingData[i] = null;
                    Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                            "Actor " + source[i].getType() + ":" + source[i].getID()
                                    + " does not have a property of ID " + p.getType());
                }
            }

            Vector<Attribute> destVector = new Vector<Attribute>();
            for (int i = 0; i < dest.length; ++i) {
                FastVector type = new FastVector();
                type.addElement("false");
                type.addElement("true");
                Attribute tmp = new Attribute(dest[i].getID(), type);
                destVector.add(tmp);
                masterSet.insertAttributeAt(tmp, masterSet.numAttributes());
            }
            Attribute sourceID = new Attribute("sourceID", sourceTypes);
            masterSet.insertAttributeAt(sourceID, masterSet.numAttributes());

            //set ground truth for evaluation
            for (int i = 0; i < masterSet.numInstances(); ++i) {
                Instance inst = masterSet.instance(i);
                Actor user = g.getActor((String) parameter[i].getValue(),
                        sourceID.value((int) inst.value(sourceID)));
                if (user != null) {
                    for (int j = 0; j < dest.length; ++j) {
                        if (g.getLink((String) parameter[4].getValue(), user, dest[j]) != null) {
                            inst.setValue(sourceID, "true");
                        } else {
                            if ((Boolean) parameter[9].getValue()) {
                                inst.setValue(sourceID, "false");
                            } else {
                                inst.setValue(sourceID, Double.NaN);
                            }
                        }
                    }
                } else {
                    Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE,
                            "Actor " + sourceID.value((int) inst.value(sourceID)) + " does not exist in graph");
                }
            }

            // perform cross fold evaluation of each classifier in turn
            String[] opts = ((String) parameter[9].getValue()).split("\\s+");
            Properties props = new Properties();
            if ((Boolean) parameter[11].getValue()) {
                props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID());
            } else {
                props.setProperty("LinkType", (String) parameter[5].getValue());
            }
            props.setProperty("LinkClass", "Basic");
            try {
                for (int destCount = 0; destCount < dest.length; ++destCount) {
                    masterSet.setClass(destVector.get(destCount));
                    for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) {
                        Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i);
                        Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i);
                        Classifier classifier = (Classifier) ((Class) parameter[7].getValue()).newInstance();
                        classifier.setOptions(opts);
                        classifier.buildClassifier(train);
                        for (int j = 0; j < test.numInstances(); ++j) {
                            String sourceName = sourceID.value((int) test.instance(j).value(sourceID));
                            double result = classifier.classifyInstance(test.instance(j));
                            String predicted = masterSet.classAttribute().value((int) result);
                            Link derived = LinkFactory.newInstance().create(props);
                            derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0,
                                    g.getActor((String) parameter[3].getValue(), predicted));
                            g.add(derived);
                        }
                    }
                }
            } catch (InstantiationException ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IllegalAccessException ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            }

        } else { // dest==null
            Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                    "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors");
        }
    } else { // source==null
        Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                "Source mode '" + (String) parameter[2].getValue() + "' has no actors");
    }
}

From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierOneAttribute.java

License:Open Source License

@Override
public void execute(Graph g) {
    Actor[] source = g.getActor((String) parameter[1].getValue());
    if (source != null) {

        // create the Instance sets for each ac
        FastVector classTypes = new FastVector();
        FastVector sourceTypes = new FastVector();
        Actor[] dest = g.getActor((String) parameter[3].getValue());
        if (dest != null) {
            for (int i = 0; i < dest.length; ++i) {
                classTypes.addElement(dest[i].getID());
            }/*from  w w  w. java  2  s  .c  o  m*/
            Attribute classAttribute = new Attribute((String) parameter[5].getValue(), classTypes);

            Instance[] trainingData = new Instance[source.length];
            Instances masterSet = null;
            for (int i = 0; i < source.length; ++i) {

                // First, acquire the instance objects for each actor
                Property p = null;
                if ((Boolean) parameter[9].getValue()) {
                    p = source[i].getProperty((String) parameter[2].getValue() + g.getID());
                } else {
                    p = source[i].getProperty((String) parameter[2].getValue());
                }
                if (p != null) {
                    Object[] values = p.getValue();
                    if (values.length > 0) {
                        sourceTypes.addElement(source[i].getID());
                        trainingData[i] = (Instance) ((Instance) values[0]).copy();
                        // assume that this Instance has a backing dataset 
                        // that contains all Instance objects to be tested
                        if (masterSet == null) {
                            masterSet = new Instances(trainingData[i].dataset(), source.length);
                        }
                        masterSet.add(trainingData[i]);
                    } else {
                        trainingData[i] = null;
                        Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                                "Actor " + source[i].getType() + ":" + source[i].getID()
                                        + " does not have an Instance value of property ID " + p.getType());
                    }
                } else {
                    trainingData[i] = null;
                    Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                            "Actor " + source[i].getType() + ":" + source[i].getID()
                                    + " does not have a property of ID " + p.getType());
                }

            } // for every actor, fix the instance
            Attribute sourceID = new Attribute("sourceID", sourceTypes);
            masterSet.insertAttributeAt(sourceID, masterSet.numAttributes());
            masterSet.insertAttributeAt(classAttribute, masterSet.numAttributes());
            masterSet.setClass(classAttribute);
            for (int i = 0; i < source.length; ++i) {
                if (trainingData[i] != null) {
                    trainingData[i].setValue(sourceID, source[i].getID());
                    Link[] link = g.getLinkBySource((String) parameter[4].getValue(), source[i]);
                    if (link == null) {
                        trainingData[i].setClassValue(Double.NaN);
                    } else {
                        trainingData[i].setClassValue(link[0].getDestination().getID());
                    }
                }
            }

            String[] opts = ((String) parameter[7].getValue()).split("\\s+");
            Properties props = new Properties();
            if ((Boolean) parameter[10].getValue()) {
                props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID());
            } else {
                props.setProperty("LinkType", (String) parameter[5].getValue());
            }
            props.setProperty("LinkClass", "Basic");
            try {
                for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) {
                    Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i);
                    Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i);
                    Classifier classifier = (Classifier) ((Class) parameter[6].getValue()).newInstance();
                    classifier.setOptions(opts);
                    classifier.buildClassifier(train);
                    for (int j = 0; j < test.numInstances(); ++j) {
                        String sourceName = sourceID.value((int) test.instance(j).value(sourceID));
                        double result = classifier.classifyInstance(test.instance(j));
                        String predicted = masterSet.classAttribute().value((int) result);
                        Link derived = LinkFactory.newInstance().create(props);
                        derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0,
                                g.getActor((String) parameter[3].getValue(), predicted));
                        g.add(derived);
                    }
                }
            } catch (InstantiationException ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IllegalAccessException ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            }

        } else { // dest==null
            Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                    "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors");
        }
    } else { // source==null
        Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                "Source mode '" + (String) parameter[2].getValue() + "' has no actors");
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Builds classifier.//www  . java2s.  com
 * 
 * @param data
 *            the data to train with
 * @throws Exception
 *             if something goes wrong or the data doesn't fit
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    // Make sure K value is in range
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    // Figure out appropriate datasets
    Instances train = null;
    Instances backfit = null;
    Random rand = data.getRandomNumberGenerator(m_randomSeed);
    if (m_NumFolds <= 0) {
        train = data;
    } else {
        data.randomize(rand);
        data.stratify(m_NumFolds);
        train = data.trainCV(m_NumFolds, 1, rand);
        backfit = data.testCV(m_NumFolds, 1);
    }

    //Set Default Instances for selection.
    setRequiredInst(data);

    // Create the attribute indices window
    int[] attIndicesWindow = new int[data.numAttributes() - 1];
    int j = 0;
    for (int i = 0; i < attIndicesWindow.length; i++) {
        if (j == data.classIndex())
            j++; // do not include the class
        attIndicesWindow[i] = j++;
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    Instances requiredInstances = getRequiredInst();
    // Build tree
    if (jsontree != null) {
        buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData,
                requiredInstances, listOfFc, cSetList, ccSer, d);
    } else {
        System.out.println("No json tree specified, failing to process tree");
    }
    setRequiredInst(requiredInstances);
    // Backfit if required
    if (backfit != null) {
        backfitData(backfit);
    }
}

From source file:semana07.IrisKnn.java

public static void main(String[] args) throws FileNotFoundException, IOException, Exception {

    // DEFININDO CONJUNTO DE TREINAMENTO

    // - Definindo o leitor do arquivo arff

    FileReader baseIris = new FileReader("iris.arff");
    // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff"

    Instances iris = new Instances(baseIris);

    // - Definindo o indice do atributo classe

    iris.setClassIndex(4);// w w w . j av a 2s.  c o m

    iris = iris.resample(new Debug.Random());

    Instances irisTreino = iris.trainCV(3, 0);
    Instances irisTeste = iris.testCV(3, 0);

    // DEFININDO EXEMPLO DESCONHECIDO

    //5.9,3.0,5.1,1.8,Iris-virginica
    Instance irisInst = new DenseInstance(iris.numAttributes());
    irisInst.setDataset(iris);
    irisInst.setValue(0, 5.9);
    irisInst.setValue(1, 3.0);
    irisInst.setValue(2, 5.1);
    irisInst.setValue(3, 1.8);

    // DEFININDO ALGORITMO DE CLASSIFICAO

    //NN

    IBk vizinhoIris = new IBk();

    //kNN

    IBk knnIris = new IBk(3);

    // MONTANDO CLASSIFICADOR
    //NN

    vizinhoIris.buildClassifier(irisTreino);

    //kNN

    knnIris.buildClassifier(irisTreino);

    // Definindo arquivo a ser escrito
    FileWriter writer = new FileWriter("iris.csv");

    // Escrevendo o cabealho do arquivo
    writer.append("Classe Real;Resultado NN;Resultado kNN");
    writer.append(System.lineSeparator());

    // Sada CLI / Console
    System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho
    for (int i = 0; i <= irisTeste.numInstances() - 1; i++) {

        Instance testeIris = irisTeste.instance(i);

        // Sada CLI / Console do valor original
        System.out.print(testeIris.stringValue(4) + ";");

        // Escrevendo o valor original no arquivo
        writer.append(testeIris.stringValue(4) + ";");

        // Definindo o atributo classe como indefinido
        testeIris.setClassMissing();

        // CLASSIFICANDO A INSTANCIA
        // NN

        double respostaVizinho = vizinhoIris.classifyInstance(testeIris);
        testeIris.setValue(4, respostaVizinho);
        String stringVizinho = testeIris.stringValue(4);

        //kNN

        double respostaKnn = knnIris.classifyInstance(testeIris);

        // Atribuindo respota ao valor do atributo do index 4(classe)

        testeIris.setValue(4, respostaKnn);

        String stringKnn = testeIris.stringValue(4);
        // Adicionando resultado ao grupo de instancia iris

        iris.add(irisInst);

        //Escrevendo os resultados no arquivo iris.csv

        writer.append(stringVizinho + ";");
        writer.append(stringKnn + ";");
        writer.append(System.lineSeparator());

        // Exibindo via CLI / Console o resultado

        System.out.print(respostaVizinho + ";");
        System.out.print(respostaKnn + ";");
        System.out.println(testeIris.stringValue(4));
    }

    writer.flush();
    writer.close();

}

From source file:sentinets.Prediction.java

License:Open Source License

public String updateModel(String inputFile, ArrayList<Double[]> metrics) {
    String output = "";
    this.setInstances(inputFile);
    FilteredClassifier fcls = (FilteredClassifier) this.cls;
    SGD cls = (SGD) fcls.getClassifier();
    Filter filter = fcls.getFilter();
    Instances insAll;/*from  ww  w .j  a  va2s  .  c  om*/
    try {
        insAll = Filter.useFilter(this.unlabled, filter);
        if (insAll.size() > 0) {
            Random rand = new Random(10);
            int folds = 10 > insAll.size() ? 2 : 10;
            Instances randData = new Instances(insAll);
            randData.randomize(rand);
            if (randData.classAttribute().isNominal()) {
                randData.stratify(folds);
            }
            Evaluation eval = new Evaluation(randData);
            eval.evaluateModel(cls, insAll);
            System.out.println("Initial Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            output += "\n====" + "Initial Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            System.out.println("Cross Validated Evaluation");
            output += "\n====" + "Cross Validated Evaluation" + "====\n";
            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                for (int i = 0; i < train.numInstances(); i++) {
                    cls.updateClassifier(train.instance(i));
                }

                eval.evaluateModel(cls, test);
                System.out.println("Cross Validated Evaluation fold: " + n);
                output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n";
                System.out.println(eval.toSummaryString());
                System.out.println(eval.toClassDetailsString());
                output += "\n" + eval.toSummaryString();
                output += "\n" + eval.toClassDetailsString();
                metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            }
            for (int i = 0; i < insAll.numInstances(); i++) {
                cls.updateClassifier(insAll.instance(i));
            }
            eval.evaluateModel(cls, insAll);
            System.out.println("Final Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            output += "\n====" + "Final Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            fcls.setClassifier(cls);
            String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS)
                    + "/updatedClassifier.model";
            weka.core.SerializationHelper.write(modelFilePath, fcls);
            output += "\n" + "Updated Model saved at: " + modelFilePath;
        } else {
            output += "No new instances for training the model.";
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return output;
}

From source file:sirius.clustering.main.ClustererClassificationPane.java

License:Open Source License

private void start() {
    //Run Classifier
    if (this.inputDirectoryTextField.getText().length() == 0) {
        JOptionPane.showMessageDialog(parent, "Please set Input Directory to where the clusterer output are!",
                "Evaluate Classifier", JOptionPane.ERROR_MESSAGE);
        return;/*from w  w  w  .  ja  va2 s  . c  o m*/
    }
    if (m_ClassifierEditor.getValue() == null) {
        JOptionPane.showMessageDialog(parent, "Please choose Classifier!", "Evaluate Classifier",
                JOptionPane.ERROR_MESSAGE);
        return;
    }
    if (validateStatsSettings(1) == false) {
        return;
    }
    if (this.clusteringClassificationThread == null) {
        startButton.setEnabled(false);
        stopButton.setEnabled(true);
        tabbedClassifierPane.setSelectedIndex(0);
        this.clusteringClassificationThread = (new Thread() {
            public void run() {
                //Clear the output text area
                levelOneClassifierOutputTextArea.setText("");
                resultsTableModel.reset();
                //double threshold = Double.parseDouble(classifierOneThresholdTextField.getText());                
                //cross-validation
                int numFolds;
                if (jackKnifeRadioButton.isSelected())
                    numFolds = -1;
                else
                    numFolds = Integer.parseInt(foldsField.getText());
                StringTokenizer st = new StringTokenizer(inputDirectoryTextField.getText(), File.separator);
                String filename = "";
                while (st.hasMoreTokens()) {
                    filename = st.nextToken();
                }
                StringTokenizer st2 = new StringTokenizer(filename, "_.");
                numOfCluster = 0;
                if (st2.countTokens() >= 2) {
                    st2.nextToken();
                    String numOfClusterString = st2.nextToken().replaceAll("cluster", "");
                    try {
                        numOfCluster = Integer.parseInt(numOfClusterString);
                    } catch (NumberFormatException e) {
                        JOptionPane.showMessageDialog(parent,
                                "Please choose the correct file! (Output from Utilize Clusterer)", "ERROR",
                                JOptionPane.ERROR_MESSAGE);
                    }
                }
                Classifier template = (Classifier) m_ClassifierEditor.getValue();

                for (int x = 0; x <= numOfCluster && clusteringClassificationThread != null; x++) {//Test each cluster
                    try {
                        long totalTimeStart = 0, totalTimeElapsed = 0;
                        totalTimeStart = System.currentTimeMillis();
                        statusLabel.setText("Reading in cluster" + x + " file..");
                        String inputFilename = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".arff");
                        String outputScoreFilename = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score");
                        BufferedWriter output = new BufferedWriter(new FileWriter(outputScoreFilename));
                        Instances inst = new Instances(new FileReader(inputFilename));
                        //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files
                        inst.setClassIndex(inst.numAttributes() - 1);
                        Random random = new Random(1);//Simply set to 1, shall implement the random seed option later
                        inst.randomize(random);
                        if (inst.attribute(inst.classIndex()).isNominal())
                            inst.stratify(numFolds);
                        // for timing
                        ClassifierResults classifierResults = new ClassifierResults(false, 0);
                        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
                        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ",
                                classifierName);
                        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                                inputFilename);
                        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                                "NA");
                        //ArrayList<Double> resultList = new ArrayList<Double>();     
                        if (jackKnifeRadioButton.isSelected() || numFolds > inst.numInstances() - 1)
                            numFolds = inst.numInstances() - 1;
                        for (int fold = 0; fold < numFolds && clusteringClassificationThread != null; fold++) {//Doing cross-validation          
                            statusLabel.setText("Cluster: " + x + " - Training Fold " + (fold + 1) + "..");
                            Instances train = inst.trainCV(numFolds, fold, random);
                            Classifier current = null;
                            try {
                                current = Classifier.makeCopy(template);
                                current.buildClassifier(train);
                                Instances test = inst.testCV(numFolds, fold);
                                statusLabel.setText("Cluster: " + x + " - Testing Fold " + (fold + 1) + "..");
                                for (int jj = 0; jj < test.numInstances(); jj++) {
                                    double[] result = current.distributionForInstance(test.instance(jj));
                                    output.write("Cluster: " + x);
                                    output.newLine();
                                    output.newLine();
                                    output.write(test.instance(jj).stringValue(test.classAttribute()) + ",0="
                                            + result[0]);
                                    output.newLine();
                                }
                            } catch (Exception ex) {
                                ex.printStackTrace();
                                statusLabel.setText("Error in cross-validation!");
                                startButton.setEnabled(true);
                                stopButton.setEnabled(false);
                            }
                        }
                        output.close();
                        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
                        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2)
                                        + " seconds");
                        double threshold = validateFieldAsThreshold(classifierOneThresholdTextField.getText(),
                                "Threshold Field", classifierOneThresholdTextField);
                        String filename2 = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score");
                        PredictionStats classifierStats = new PredictionStats(filename2, 0, threshold);

                        resultsTableModel.add("Cluster " + x, classifierResults, classifierStats);
                        resultsTable.setRowSelectionInterval(x, x);
                        computeStats(numFolds);//compute and display the results                    
                    } catch (Exception e) {
                        e.printStackTrace();
                        statusLabel.setText("Error in reading file!");
                        startButton.setEnabled(true);
                        stopButton.setEnabled(false);
                    }
                } //end of cluster for loop

                resultsTableModel.add("Summary - Equal Weightage", null, null);
                resultsTable.setRowSelectionInterval(numOfCluster + 1, numOfCluster + 1);
                computeStats(numFolds);
                resultsTableModel.add("Summary - Weighted Average", null, null);
                resultsTable.setRowSelectionInterval(numOfCluster + 2, numOfCluster + 2);
                computeStats(numFolds);

                if (clusteringClassificationThread != null)
                    statusLabel.setText("Done!");
                else
                    statusLabel.setText("Interrupted..");
                startButton.setEnabled(true);
                stopButton.setEnabled(false);
                if (classifierOne != null) {
                    levelOneClassifierOutputScrollPane.getVerticalScrollBar()
                            .setValue(levelOneClassifierOutputScrollPane.getVerticalScrollBar().getMaximum());
                }
                clusteringClassificationThread = null;

            }
        });
        this.clusteringClassificationThread.setPriority(Thread.MIN_PRIORITY);
        this.clusteringClassificationThread.start();
    } else {
        JOptionPane.showMessageDialog(parent,
                "Cannot start new job as previous job still running. Click stop to terminate previous job",
                "ERROR", JOptionPane.ERROR_MESSAGE);
    }
}

From source file:src.BigDataClassifier.GenerateFolds.java

public void generateFolds(Instances trainDataset) throws Exception {

    //randomize data
    Random rand = new Random(1);
    //set folds/*from  w w w.j a  v a 2s .c  o  m*/
    int folds = 3;
    //create random dataset
    Instances randData = new Instances(trainDataset);
    randData.randomize(rand);

    Instances[] result = new Instances[folds * 2];
    //cross-validate
    for (int n = 0; n < folds; n++) {
        trainDataset = randData.trainCV(folds, n);
        System.out.println("Train dataset size is = " + trainDataset.size());
        Instances testDataset = randData.testCV(folds, n);
        System.out.println("Test dataset size is = " + testDataset.size());
        result[n] = trainDataset;
        result[n + 1] = testDataset;
        trainDataset2 = trainDataset;
        testDataset2 = testDataset;
    }
    trainDatasetSize = trainDataset2.size();
    testDatasetSize = testDataset2.size();
}

From source file:tubes.ml.pkg1.TubesML1.java

public void akses() throws Exception {
    Discretize filter;/* w w  w  . j a v a  2s. co m*/
    int fold = 10;
    int fold3 = 3;
    int trainNum, testNum;
    PrintWriter file = new PrintWriter("model.txt");

    /***dataset 1***/
    file.println("***DATASET 1***");
    fileReader tets = new fileReader("./src/data/iris.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    Instances data = tets.getData();
    filter = new Discretize();
    try {
        filter.setInputFormat(data);
    } catch (Exception ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*ID3*/
    Instances discreteData;
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        try {

            Instances train = discreteData.trainCV(fold, i);
            Instances test = discreteData.testCV(fold, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;
    J48 jKT = new J48();
    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
            // System.out.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*dataset 2*/
    file.println("***DATASET 2***");
    tets.setFilepath("./src/data/weather.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    data = new Instances(tets.getData());

    /*ID3*/
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold3; i++) {
        try {
            Instances train = discreteData.trainCV(trainNum, i);
            Instances test = discreteData.testCV(testNum, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                //file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println("Validation " + (i + 1));
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println(testNum);
    file.println("Test Number");
    file.println(testNum);

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
            System.out.println(jKT.toString());
            file.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*dataset 3*/
    file.println("***DATASET 3***");
    tets.setFilepath("./src/data/weather.nominal.arff");
    try {
        tets.read();
    } catch (IOException ex) {
        Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
    }
    data = new Instances(tets.getData());

    /*ID3*/
    discreteData = Filter.useFilter(data, filter);
    trainNum = discreteData.numInstances() * 3 / 4;
    testNum = discreteData.numInstances() / 4;

    for (int i = 0; i < fold3; i++) {
        try {
            Instances train = discreteData.trainCV(fold, i);
            Instances test = discreteData.testCV(fold, i);

            Id3 iTiga = new Id3();
            Evaluation validation = new Evaluation(train);
            try {
                iTiga.buildClassifier(train);
                System.out.println(iTiga.toString());
                file.println(iTiga.toString());
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(iTiga, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println(testNum);
    file.println("Test Number");
    file.println(testNum);

    /*J48*/
    trainNum = data.numInstances() * 3 / 4;
    testNum = data.numInstances() / 4;

    for (int i = 0; i < fold; i++) {
        Instances train = data.trainCV(fold, i);
        Instances test = data.testCV(fold, i);
        try {
            Evaluation validation = new Evaluation(train);
            try {
                jKT.buildClassifier(data);
            } catch (Exception ex) {
                Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
            }
            validation.evaluateModel(jKT, test);
            System.out.println(validation.toSummaryString());
            file.println(validation.toSummaryString());
            System.out.println(jKT.toString());
            file.println(jKT.toString());
        } catch (Exception ex) {
            Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /*RESULTT*/
    System.out.println(jKT.toString());
    file.println("RESULT");
    file.println(jKT.toString());
    file.close();
}