Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset)

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method.//from w ww. j  av  a 2s  .  com
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:classifyfromimage.java

private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed
    this.name3 = IJ.getImage().getTitle();
    this.name4 = this.name3.replaceFirst("[.][^.]+$", "");
    System.out.println("hola " + this.name4);
    selectWindow(this.name3);
    System.out.println(this.name4);
    System.out.println(this.name3);
    RoiManager rm = RoiManager.getInstance();
    IJ.run("Duplicate...", this.name3);
    IJ.run("Set Measurements...", "area perimeter fit shape limit scientific redirect=None decimal=5");
    selectWindow(this.name3);
    IJ.run("Subtract Background...", "rolling=1.5");
    IJ.run("Enhance Contrast...", "saturated=25 equalize");
    IJ.run("Subtract Background...", "rolling=1.5");
    IJ.run("Convolve...",
            "text1=[-1 -3 -4 -3 -1\n-3 0 6 0 -3\n-4 6 50 6 -4\n-3 0 6 0 -3\n-1 -3 -4 -3 -1\n] normalize");
    IJ.run("8-bit", "");
    IJ.run("Restore Selection", "");
    IJ.run("Make Binary", "");
    Prefs.blackBackground = false;//from ww w . j ava2 s .c o m
    IJ.run("Convert to Mask", "");
    IJ.run("Restore Selection", "");
    this.valor1 = this.interval3.getText();
    this.valor2 = this.interval4.getText();
    System.out.println("VECTOR-> punctua: " + this.valor1 + " " + this.valor2);
    this.text = "size=" + this.valor1 + "-" + this.valor2
            + " pixel show=Outlines display include summarize add";
    IJ.run("Analyze Particles...", this.text);
    IJ.saveAs("tif", this.name3 + "_processed");
    String dest_filename1, dest_filename2, full;
    selectWindow("Results");
    //dest_filename1 = this.name2 + "_complete.txt";
    dest_filename2 = this.name3 + "_complete.csv";
    //IJ.saveAs("Results", prova + File.separator + dest_filename1);
    IJ.run("Input/Output...", "jpeg=85 gif=-1 file=.csv copy_row save_column save_row");
    //IJ.saveAs("Results", dir + File.separator + dest_filename2);
    IJ.saveAs("Results", this.name3 + "_complete.csv");
    IJ.run("Restore Selection");
    IJ.run("Clear Results");

    //txtarea.setText("Converting, please wait...  ");
    try {
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(this.name3 + "_complete.csv"));
        Instances data = loader.getDataSet();
        System.out.println(data);

        // save ARFF
        String arffile = this.name3 + ".arff";
        System.out.println(arffile);
        ArffSaver saver = new ArffSaver();
        saver.setInstances(data);
        saver.setFile(new File(arffile));
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    }
    //txtdata2.setText(this.name3);

    //txtarea.setText("Succesfully converted " + this.name3);
    //txtarea.setText("Analysing your data, please wait...  ");
    Instances data;
    try {
        data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff")));
        Instances newData = null;
        Add filter;
        newData = new Instances(data);
        filter = new Add();
        filter.setAttributeIndex("last");
        filter.setNominalLabels("rods,punctua,networks");
        filter.setAttributeName("target");
        filter.setInputFormat(newData);
        newData = Filter.useFilter(newData, filter);
        System.out.print(newData);
        Vector vec = new Vector();
        newData.setClassIndex(newData.numAttributes() - 1);
        if (!newData.equalHeaders(newData)) {
            throw new IllegalArgumentException("Train and test are not compatible!");
        }

        URL urlToModel = this.getClass().getResource("/" + "Final.model");
        InputStream stream = urlToModel.openStream();

        Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream);
        System.out.println("PROVANT MODEL.classifyInstance");
        for (int i = 0; i < newData.numInstances(); i++) {
            double pred = cls.classifyInstance(newData.instance(i));
            double[] dist = cls.distributionForInstance(newData.instance(i));
            System.out.print((i + 1) + " - ");
            System.out.print(newData.classAttribute().value((int) pred) + " - ");
            //txtarea2.setText(Utils.arrayToString(dist));

            System.out.println(Utils.arrayToString(dist));

            vec.add(newData.classAttribute().value((int) pred));

        }
        int p = 0, n = 0, r = 0;

        //txtarea2.append(Utils.arrayToString(this.target));
        for (Object vec1 : vec) {
            if ("rods".equals(vec1.toString())) {
                r = r + 1;
            }
            if ("punctua".equals(vec1.toString())) {
                p = p + 1;
            }
            if ("networks".equals(vec1.toString())) {
                n = n + 1;
            }

            PrintWriter out = null;
            try {

                out = new PrintWriter(this.name3 + "_morphology.txt");
                out.println(vec);
                out.close();
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            //System.out.println(vec.get(i));
        }
        System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n);
        IJ.showMessage(
                "Your file:" + this.name3 + "arff" + "\nhas been analysed, and it is composed by-> punctua: "
                        + p + ", rods: " + r + ", networks: " + n);
        this.txtarea2.setText(
                "Your file:" + this.name3 + ".arff" + "\nhas been analysed, and it is composed by-> punctua: "
                        + p + ", rods: " + r + ", networks: " + n);
        A_MachineLearning nf1 = new A_MachineLearning();
        A_MachineLearning.txtresults1.setText(this.txtarea2.getText());
        A_MachineLearning.txtresults1.setText(this.txtarea2.getText());
        A_MachineLearning.txtresults1.setText(this.txtarea2.getText());
        A_MachineLearning.txtresults1.append(this.txtarea2.getText());
        A_MachineLearning.txtresults1.append(this.txtarea2.getText());
        A_MachineLearning.txtresults1.append(this.txtarea2.getText());
        nf1.setVisible(true);

    } catch (IOException ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    }

    IJ.run("Clear Results");
    //IJ.RoiManager("Delete");
    IJ.run("Clear Results");
    IJ.run("Close All", "");

    if (WindowManager.getFrame("Results") != null) {
        IJ.selectWindow("Results");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("Summary") != null) {
        IJ.selectWindow("Summary");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("Results") != null) {
        IJ.selectWindow("Results");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("ROI Manager") != null) {
        IJ.selectWindow("ROI Manager");
        IJ.run("Close");
    }

    IJ.run("Close All", "roiManager");
    IJ.run("Close All", "");
    setVisible(false);
    dispose();// TODO add your handling code here:

    setVisible(false);
    dispose();// TODO add your handling code here:
    // TODO add your handling code here:
}

From source file:ExperimentDemo.java

License:Open Source License

/**
 * Expects the following parameters: //w  w w.  j a v  a 2  s. c o  m
 * <ul>
 *   <li>-classifier "classifier incl. parameters"</li>
 *   <li>-exptype "classification|regression"</li>
 *   <li>-splittype "crossvalidation|randomsplit"</li>
 *   <li>-runs "# of runs"</li>
 *   <li>-folds "# of cross-validation folds"</li>
 *   <li>-percentage "percentage for randomsplit"</li>
 *   <li>-result "arff file for storing the results"</li>
 *   <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args   the commandline arguments
 * @throws Exception   if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: weka.examples.experiment.ExperimentDemo\n"
                + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    /*
     * Interface to objects able to generate a fixed set of results for a particular split of a dataset.
     * The set of results should contain fields related to any settings of the SplitEvaluator (not including the dataset name.
     * For example, one field for the classifier used to get the results, another for the classifier options, etc).
     * Possible implementations of SplitEvaluator: StdClassification results, StdRegression results.
     */
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        /*
         * A SplitEvaluator that produces results for a classification scheme on a nominal class attribute. 
         */
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        /*
         * Generates for each run, carries out an n-fold cross-validation, using the set SplitEvaluator to generate some results.
         * If the class attribute is nominal, the dataset is stratified. Results for each fold are generated, so you may wish to use
         * this in addition with an AveragingResultProducer to obtain averages for each run. 
         */
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        /*
         * Stores information on a property of an object: the class of the object with the property;
         * the property descriptor, and the current value.
         */
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    /*
     * Calculates T-Test statistics on data stored in a set of instances. 
     */
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}

From source file:PrincipalComponents.java

License:Open Source License

private void buildAttributeConstructor(Instances data) throws Exception {
    m_eigenvalues = null;/*from w w  w .j  av a 2  s.  co m*/
    m_outputNumAtts = -1;
    m_attributeFilter = null;
    m_nominalToBinFilter = null;
    m_sumOfEigenValues = 0.0;
    m_trainInstances = new Instances(data);

    // make a copy of the training data so that we can get the class
    // column to append to the transformed data (if necessary)
    m_trainHeader = new Instances(m_trainInstances, 0);

    m_replaceMissingFilter = new ReplaceMissingValues();
    m_replaceMissingFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter);

    /*
       * if (m_normalize) { m_normalizeFilter = new Normalize();
     * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances
     * = Filter.useFilter(m_trainInstances, m_normalizeFilter); }
     */

    m_nominalToBinFilter = new NominalToBinary();
    m_nominalToBinFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter);

    // delete any attributes with only one distinct value or are all missing
    Vector<Integer> deleteCols = new Vector<Integer>();
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
        if (m_trainInstances.numDistinctValues(i) <= 1) {
            deleteCols.addElement(new Integer(i));
        }
    }

    if (m_trainInstances.classIndex() >= 0) {
        // get rid of the class column
        m_hasClass = true;
        m_classIndex = m_trainInstances.classIndex();
        deleteCols.addElement(new Integer(m_classIndex));
    }

    // remove columns from the data if necessary
    if (deleteCols.size() > 0) {
        m_attributeFilter = new Remove();
        int[] todelete = new int[deleteCols.size()];
        for (int i = 0; i < deleteCols.size(); i++) {
            todelete[i] = (deleteCols.elementAt(i)).intValue();
        }
        m_attributeFilter.setAttributeIndicesArray(todelete);
        m_attributeFilter.setInvertSelection(false);
        m_attributeFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
    }

    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_trainInstances);

    m_numInstances = m_trainInstances.numInstances();
    m_numAttribs = m_trainInstances.numAttributes();

    fillCovariance();

    SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation);

    m_eigenvectors = Matrices.getArray(evd.getEigenvectors());
    m_eigenvalues = evd.getEigenvalues();

    /*
       * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j <
     * m_numAttribs; j++) { System.err.println(v[i][j] + " "); }
     * System.err.println(d[i]); }
     */

    // any eigenvalues less than 0 are not worth anything --- change to 0
    for (int i = 0; i < m_eigenvalues.length; i++) {
        if (m_eigenvalues[i] < 0) {
            m_eigenvalues[i] = 0.0;
        }
    }
    m_sortedEigens = Utils.sort(m_eigenvalues);
    m_sumOfEigenValues = Utils.sum(m_eigenvalues);

    m_transformedFormat = setOutputFormat();
    if (m_transBackToOriginal) {
        m_originalSpaceFormat = setOutputFormatOriginal();

        // new ordered eigenvector matrix
        int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes()
                : m_transformedFormat.numAttributes() - 1;

        double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1];

        // try converting back to the original space
        for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
            for (int j = 0; j < m_numAttribs; j++) {
                orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]];
            }
        }

        // transpose the matrix
        int nr = orderedVectors.length;
        int nc = orderedVectors[0].length;
        m_eTranspose = new double[nc][nr];
        for (int i = 0; i < nc; i++) {
            for (int j = 0; j < nr; j++) {
                m_eTranspose[i][j] = orderedVectors[j][i];
            }
        }
    }
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Gets the transformed training data.//from w w w.j  a va2  s  .  c  om
 *
 * @return the transformed training data
 * @throws Exception if transformed data can't be returned
 */
@Override
public Instances transformedData(Instances data) throws Exception {
    if (m_eigenvalues == null) {
        throw new Exception("Principal components hasn't been built yet");
    }

    Instances output = null;

    if (m_transBackToOriginal) {
        output = new Instances(m_originalSpaceFormat);
    } else {
        output = new Instances(m_transformedFormat);
    }
    for (int i = 0; i < data.numInstances(); i++) {
        Instance converted = convertInstance(data.instance(i));
        output.add(converted);
    }

    return output;
}

From source file:BaggingImprove.java

/**
 * Bagging method.//from  ww w .j av  a2s.c  om
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:CrossValidationMultipleRuns.java

License:Open Source License

/**
 * Performs the cross-validation. See Javadoc of class for information
 * on command-line parameters./*from ww w  .  j  av a  2s .  co m*/
 *
 * @param args   the command-line parameters
 * @throws Exception   if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // loads data and set class index
    Instances data = DataSource.read(Utils.getOption("t", args));
    String clsIndex = Utils.getOption("c", args);
    if (clsIndex.length() == 0)
        clsIndex = "last";
    if (clsIndex.equals("first"))
        data.setClassIndex(0);
    else if (clsIndex.equals("last"))
        data.setClassIndex(data.numAttributes() - 1);
    else
        data.setClassIndex(Integer.parseInt(clsIndex) - 1);

    // classifier
    String[] tmpOptions;
    String classname;
    tmpOptions = Utils.splitOptions(Utils.getOption("W", args));
    classname = tmpOptions[0];
    tmpOptions[0] = "";
    Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions);

    // other options
    int runs = Integer.parseInt(Utils.getOption("r", args));
    int folds = Integer.parseInt(Utils.getOption("x", args));

    // perform cross-validation
    for (int i = 0; i < runs; i++) {
        // randomize data
        int seed = i + 1;
        Random rand = new Random(seed);
        Instances randData = new Instances(data);
        randData.randomize(rand);
        //if (randData.classAttribute().isNominal())
        //   randData.stratify(folds);

        Evaluation eval = new Evaluation(randData);

        StringBuilder optionsString = new StringBuilder();
        for (String s : cls.getOptions()) {
            optionsString.append(s);
            optionsString.append(" ");
        }

        // output evaluation
        System.out.println();
        System.out.println("=== Setup run " + (i + 1) + " ===");
        System.out.println("Classifier: " + optionsString.toString());
        System.out.println("Dataset: " + data.relationName());
        System.out.println("Folds: " + folds);
        System.out.println("Seed: " + seed);
        System.out.println();

        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n);
            Instances test = randData.testCV(folds, n);

            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(cls);
            clsCopy.buildClassifier(train);
            eval.evaluateModel(clsCopy, test);
            System.out.println(eval.toClassDetailsString());
        }

        System.out.println(
                eval.toSummaryString("=== " + folds + "-fold Cross-validation run " + (i + 1) + " ===", false));
    }
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier.//from w w  w. jav  a 2  s . c om
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:A_MachineLearning.java

private void jButton7ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton7ActionPerformed
    Instances data;/*from  w ww.  ja  va  2  s  .c  o m*/
    try {
        data = new Instances(new BufferedReader(new FileReader(this.file2 + ".arff")));
        Instances newData = null;
        Add filter;
        newData = new Instances(data);
        filter = new Add();
        filter.setAttributeIndex("last");
        filter.setNominalLabels("rods,punctua,networks");
        filter.setAttributeName("target");
        filter.setInputFormat(newData);
        newData = Filter.useFilter(newData, filter);
        System.out.print(newData);
        Vector vec = new Vector();
        newData.setClassIndex(newData.numAttributes() - 1);
        if (!newData.equalHeaders(newData)) {
            throw new IllegalArgumentException("Train and test are not compatible!");
        }

        URL urlToModel = this.getClass().getResource("/" + "Final.model");
        InputStream stream = urlToModel.openStream();

        Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream);
        System.out.println("PROVANT MODEL.classifyInstance");
        for (int i = 0; i < newData.numInstances(); i++) {
            double pred = cls.classifyInstance(newData.instance(i));
            double[] dist = cls.distributionForInstance(newData.instance(i));
            System.out.print((i + 1) + " - ");
            System.out.print(newData.classAttribute().value((int) pred) + " - ");
            //txtarea2.setText(Utils.arrayToString(dist));

            System.out.println(Utils.arrayToString(dist));

            vec.add(newData.classAttribute().value((int) pred));

        }
        int p = 0, n = 0, r = 0;

        //txtarea2.append(Utils.arrayToString(this.target));
        for (Object vec1 : vec) {
            if ("rods".equals(vec1.toString())) {
                r = r + 1;
            }
            if ("punctua".equals(vec1.toString())) {
                p = p + 1;
            }
            if ("networks".equals(vec1.toString())) {
                n = n + 1;
            }

            PrintWriter out = null;
            try {

                out = new PrintWriter(this.file2 + "_morphology.txt");
                out.println(vec);
                out.close();
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            //System.out.println(vec.get(i));
        }
        System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n);
        IJ.showMessage(
                "Your file:" + this.file2 + "arff" + "\nhas been analysed, and it is composed by-> punctua: "
                        + p + ", rods: " + r + ", networks: " + n);
        txtresults1.setText(
                "Your file:" + this.file2 + "arff" + "\nhas been analysed, and it is composed by: \npunctua: "
                        + p + ", rods: " + r + ", networks: " + n);

    } catch (IOException ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    }

    IJ.showMessage("analysing complete ");
}

From source file:Pair.java

License:Open Source License

public void setOptions(String[] options) throws Exception {

    String sourceFileName = Utils.getOption('S', options);
    if (sourceFileName.length() == 0) {
        throw new Exception("A filename must be specified with" + " the -S option.");
    } else {/*from w  w w .  ja v a  2  s  .  c  om*/
        setSourceFile(new File(sourceFileName));
    }

    doFraction = (Utils.getFlag('F', options));
    doBagging = (Utils.getFlag('B', options));
    doUpsource = (Utils.getFlag('U', options));
    useMedian = (Utils.getFlag('M', options));
    resample = (Utils.getFlag('R', options));
    doSampleSize = Utils.getFlag("SS", options);
    fixedBeta = Utils.getFlag("FB", options);

    String optionString = Utils.getOption("TT", options);
    testData = new Instances(new BufferedReader(new FileReader(optionString)));
    testData.setClassIndex(testData.numAttributes() - 1);

    String r = Utils.getOption("Ratio", options);
    if (!r.equals(""))
        sourceRatio = Double.parseDouble(r);

    super.setOptions(options);

    r = Utils.getOption("II", options);
    if (!r.equals(""))
        sourceIterations = Integer.parseInt(r);
    else
        sourceIterations = m_NumIterations;

}