Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels./*from   w  w w  .  j  av  a2  s.c  o m*/
 *
 * @param   j    index 1 (assume that <code>j &lt; k</code>)
 * @param   k   index 2 (assume that <code>j &lt; k</code>)
 * @param   D   iInstances, with attributes in labeled by original index
 * @return       Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 */
public static Instances mergeLabels(Instances D, int j, int k, int p) {
    int L = D.classIndex();

    HashMap<String, Integer> count = new HashMap<String, Integer>();

    Set<String> values = new HashSet<String>();
    for (int i = 0; i < D.numInstances(); i++) {
        String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k));
        String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k);
        //System.out.println("w = "+w);
        count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1);
        values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)));
    }
    //System.out.println("("+j+","+k+")"+values);
    System.out.print("pruned from " + count.size() + " to ");
    MLUtils.pruneCountHashMap(count, p);
    String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future
    System.out.println("" + count.size() + " with p = " + p);
    System.out.println("" + count);
    values = count.keySet();

    // Create and insert the new attribute
    D.insertAttributeAt(
            new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L);

    // Set values for the new attribute
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        String y_jk = encodeValue(x.stringValue(j), x.stringValue(k));
        try {
            x.setValue(L, y_jk); // y_jk = 
        } catch (Exception e) {
            //x.setMissing(L);
            //D.delete(i);
            //i--;
            String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG
            //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close));
            int max_c = 0;
            for (String y_ : y_close) {
                int c = count.get(y_);
                if (c > max_c) {
                    max_c = c;
                    y_max = y_;
                }
            }
            //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ...");
            x.setValue(L, y_max);
            // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one)
        }
    }

    // Delete separate attributes
    D.deleteAttributeAt(k > j ? k : j);
    D.deleteAttributeAt(k > j ? j : k);

    // Set class index
    D.setClassIndex(L - 1);
    return D;
}

From source file:meka.gui.dataviewer.DataViewerMainPanel.java

License:Open Source License

/**
 * displays some properties of the instances
 *///from   ww w . j av  a2  s.  c  o m
public void showProperties() {
    DataPanel panel;
    ListSelectorDialog dialog;
    Vector<String> props;
    Instances inst;

    panel = getCurrentPanel();
    if (panel == null) {
        return;
    }

    inst = panel.getInstances();
    if (inst == null) {
        return;
    }
    if (inst.classIndex() < 0) {
        inst.setClassIndex(inst.numAttributes() - 1);
    }

    // get some data
    props = new Vector<String>();
    props.add("Filename: " + panel.getFilename());
    props.add("Relation name: " + inst.relationName());
    props.add("# of instances: " + inst.numInstances());
    props.add("# of attributes: " + inst.numAttributes());
    props.add("Class attribute: " + inst.classAttribute().name());
    props.add("# of class labels: " + inst.numClasses());

    dialog = new ListSelectorDialog(getParentFrame(), new JList(props));
    dialog.showDialog();
}

From source file:meka.gui.explorer.classify.PredictionsOnTestset.java

License:Open Source License

/**
 * Returns the action lister to use in the menu.
 *
 * @param history   the current history/*  ww  w.j a  v  a  2  s  .  c om*/
 * @param index     the selected history item
 * @return          the listener
 */
@Override
public ActionListener getActionListener(final ResultHistoryList history, final int index) {
    final MultiLabelClassifier classifier = (MultiLabelClassifier) getClassifier(history, index);
    final Instances header = getHeader(history, index);

    return new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            Runnable run = new Runnable() {
                @Override
                public void run() {
                    ClassifyTab owner = (ClassifyTab) getOwner();
                    Instances test;
                    owner.startBusy("Predictions on test...");
                    try {
                        MLUtils.prepareData(owner.getTestData());
                        test = new Instances(owner.getTestData());
                        test.setClassIndex(owner.getTestData().classIndex());
                        String msg = header.equalHeadersMsg(test);
                        if (msg != null)
                            throw new IllegalArgumentException(
                                    "Model's training set and current test set are not compatible:\n" + msg);
                        // collect predictions
                        Instances predicted = new Instances(test, 0);
                        for (int i = 0; i < test.numInstances(); i++) {
                            double pred[] = classifier.distributionForInstance(test.instance(i));
                            // Cut off any [no-longer-needed] probabalistic information from MT classifiers.
                            if (classifier instanceof MultiTargetClassifier)
                                pred = Arrays.copyOf(pred, test.classIndex());
                            Instance predInst = (Instance) test.instance(i).copy();
                            for (int j = 0; j < pred.length; j++)
                                predInst.setValue(j, pred[j]);
                            predicted.add(predInst);
                            if ((i + 1) % 100 == 0)
                                owner.showStatus(
                                        "Predictions on test (" + (i + 1) + "/" + test.numInstances() + ")...");
                        }
                        owner.finishBusy();
                        // display predictions
                        DataViewerDialog dialog = new DataViewerDialog(GUIHelper.getParentFrame(owner),
                                ModalityType.MODELESS);
                        dialog.setDefaultCloseOperation(DataViewerDialog.DISPOSE_ON_CLOSE);
                        dialog.setInstances(predicted);
                        dialog.setSize(800, 600);
                        dialog.setLocationRelativeTo(owner);
                        dialog.setVisible(true);
                    } catch (Exception e) {
                        owner.handleException("Predictions failed on test set:", e);
                        owner.finishBusy("Predictions failed: " + e);
                        JOptionPane.showMessageDialog(owner, "Predictions failed:\n" + e, "Error",
                                JOptionPane.ERROR_MESSAGE);
                    }
                }
            };
            ((ClassifyTab) getOwner()).start(run);
        }
    };
}

From source file:meka.gui.explorer.classify.ReevaluateModelOnTestset.java

License:Open Source License

/**
 * Returns the action lister to use in the menu.
 *
 * @param history   the current history//from   w w w . ja v a  2s.  c  o  m
 * @param index     the selected history item
 * @return          the listener
 */
@Override
public ActionListener getActionListener(final ResultHistoryList history, final int index) {
    final MultiLabelClassifier classifier = (MultiLabelClassifier) getClassifier(history, index);
    final Instances header = getHeader(history, index);

    return new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            Runnable run = new Runnable() {
                @Override
                public void run() {
                    ClassifyTab owner = (ClassifyTab) getOwner();
                    Result result;
                    Instances test;
                    owner.startBusy("Reevaluate on test...");
                    try {
                        MLUtils.prepareData(owner.getTestData());
                        test = new Instances(owner.getTestData());
                        test.setClassIndex(owner.getTestData().classIndex());
                        String msg = header.equalHeadersMsg(test);
                        if (msg != null)
                            throw new IllegalArgumentException(
                                    "Model's training set and current test set are not compatible:\n" + msg);
                        owner.log(OptionUtils.toCommandLine(classifier));
                        owner.log("Testset: " + test.relationName());
                        owner.log("Class-index: " + test.classIndex());
                        result = Evaluation.evaluateModel(classifier, test, "0.0", owner.getVOP()); // TODO what threshold to use?
                        owner.addResultToHistory(result, new Object[] { classifier, new Instances(test, 0) },
                                classifier.getClass().getName().replace("meka.classifiers.", ""));
                        owner.finishBusy();
                    } catch (Exception e) {
                        owner.handleException("Reevaluation failed on test set:", e);
                        owner.finishBusy("Reevaluation failed: " + e);
                        JOptionPane.showMessageDialog(owner, "Reevaluation failed:\n" + e, "Error",
                                JOptionPane.ERROR_MESSAGE);
                    }
                }
            };
            ((ClassifyTab) getOwner()).start(run);
        }
    };
}

From source file:meka.gui.explorer.ClassifyTab.java

License:Open Source License

/**
 * Starts the classification./*from w w w.  java2s. c  o  m*/
 */
protected void startClassification() {
    String type;
    Runnable run;
    final Instances data;

    if (m_ComboBoxExperiment.getSelectedIndex() == -1)
        return;

    data = new Instances(getData());
    if (m_Randomize)
        data.randomize(new Random(m_Seed));
    type = m_ComboBoxExperiment.getSelectedItem().toString();
    run = null;

    switch (type) {
    case TYPE_CROSSVALIDATION:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Cross-validating...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = Evaluation.cvModel(classifier, data, m_Folds, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed:", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed (CV):\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_TRAINTESTSPLIT:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                int trainSize;
                Instances train;
                Instances test;
                startBusy("Train/test split...");
                try {
                    trainSize = (int) (data.numInstances() * m_SplitPercentage / 100.0);
                    train = new Instances(data, 0, trainSize);
                    test = new Instances(data, trainSize, data.numInstances() - trainSize);
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + train.relationName());
                    log("Class-index: " + train.classIndex());
                    result = Evaluation.evaluateModel(classifier, train, test, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(train, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (train/test split):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_SUPPLIEDTESTSET:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                int trainSize;
                Instances train;
                Instances test;
                startBusy("Supplied test...");
                try {
                    train = new Instances(data);
                    MLUtils.prepareData(m_TestInstances);
                    test = new Instances(m_TestInstances);
                    test.setClassIndex(data.classIndex());
                    String msg = train.equalHeadersMsg(test);
                    if (msg != null)
                        throw new IllegalArgumentException("Train and test set are not compatible:\n" + msg);
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + train.relationName());
                    log("Class-index: " + train.classIndex());
                    result = Evaluation.evaluateModel(classifier, train, test, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(train, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (train/test split):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_BINCREMENTAL:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Incremental...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = IncrementalEvaluation.evaluateModelBatchWindow(classifier, data, m_Samples, 1.,
                            m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (incremental splits):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_PREQUENTIAL:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Incremental...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = IncrementalEvaluation.evaluateModelPrequentialBasic(classifier, data,
                            (data.numInstances() / (m_Samples + 1)), 1., m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (incremental splits):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    default:
        throw new IllegalStateException("Unhandled evaluation type: " + type);
    }

    start(run);
}

From source file:milk.core.Exemplar.java

License:Open Source License

/**
 * Constructor creating an exemplar with the given dataset and the 
 * given ID index/*  w w  w.  j a va  2s  .c om*/
 *
 * @param instances the instances from which the header 
 * information is to be taken
 * @param id the index of the ID of the exemplar 
 */
public Exemplar(Instances dataset, int id) throws Exception {
    m_IdIndex = id;
    m_ClassIndex = dataset.classIndex();
    m_Instances = new Instances(dataset);

    if (!m_Instances.attribute(m_IdIndex).isNominal())
        throw new Exception("The exempler's ID is not nominal!");

    double idvalue = (m_Instances.firstInstance()).value(m_IdIndex);
    double clsvalue = (m_Instances.firstInstance()).classValue();

    // The the validity of this exemplar
    for (int i = 1; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if ((!Utils.eq(inst.value(m_IdIndex), idvalue))
        //|| (!Utils.eq(inst.classValue(), clsvalue))
        )
            throw new Exception("The Id value and/or class value is not unique!");
    }

    m_IdValue = idvalue;
    m_ClassValue = clsvalue;
}

From source file:milk.core.Exemplars.java

License:Open Source License

/**
 * Constructor using the given dataset and set ID index to 
 * the given ID index.  Any instances with class value or ID
 * value missing will be dropped./*w  ww.jav  a  2  s.co m*/
 *
 * @param dataset the instances from which the header 
 * information is to be taken
 * @param idIndex the ID attribute's index 
 * @exception Exception if the class index of the dataset 
 * is not set(i.e. -1) or the data is not a multi-instance data
 */
public Exemplars(Instances dataset, int idIndex) throws Exception {
    if (dataset.classIndex() == -1)
        throw new Exception(" Class Index negative (class not set yet)!");

    m_ClassIndex = dataset.classIndex();
    m_RelationName = dataset.relationName();
    int numAttr = dataset.numAttributes();
    m_Attributes = new Attribute[numAttr];
    for (int i = 0; i < numAttr; i++)
        m_Attributes[i] = dataset.attribute(i);

    m_IdIndex = idIndex;
    Attribute id = m_Attributes[m_IdIndex];
    if ((m_IdIndex > numAttr) || (m_IdIndex < 0) || (!id.isNominal()))
        throw new Exception("ID index is wrong!");

    m_Exemplars = new Vector(id.numValues());

    for (int j = 0; j < dataset.numInstances(); j++) {
        Instance ins = dataset.instance(j);
        add(ins);
    }
}

From source file:miRdup.WekaModule.java

License:Open Source License

public static void trainModel(File arff, String keyword) {
    dec.setMaximumFractionDigits(3);/*from ww w . ja va  2  s  . co m*/
    System.out.println("\nTraining model on file " + arff);
    try {
        // load data
        DataSource source = new DataSource(arff.toString());
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        PrintWriter pwout = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "Output"));
        PrintWriter pwroc = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "roc.arff"));

        //remove ID row
        Remove rm = new Remove();
        rm.setAttributeIndices("1");
        FilteredClassifier fc = new FilteredClassifier();
        fc.setFilter(rm);

        //            // train model svm
        //            weka.classifiers.functions.LibSVM model = new weka.classifiers.functions.LibSVM();
        //            model.setOptions(weka.core.Utils.splitOptions("-S 0 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.0010 -P 0.1 -B"));
        // train model MultilayerPerceptron
        //            weka.classifiers.functions.MultilayerPerceptron model = new weka.classifiers.functions.MultilayerPerceptron();
        //            model.setOptions(weka.core.Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a"));
        // train model Adaboost on RIPPER
        //            weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1();
        //            model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.JRip -- -F 10 -N 2.0 -O 5 -S 1"));
        // train model Adaboost on FURIA
        //            weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1();
        //            model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.FURIA -- -F 10 -N 2.0 -O 5 -S 1 -p 0 -s 0"));
        //train model Adaboot on J48 trees
        //             weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1();
        //             model.setOptions(
        //                     weka.core.Utils.splitOptions(
        //                     "-P 100 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -C 0.25 -M 2"));
        //train model Adaboot on Random Forest trees
        weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1();
        model.setOptions(weka.core.Utils
                .splitOptions("-P 100 -S 1 -I 10 -W weka.classifiers.trees.RandomForest -- -I 50 -K 0 -S 1"));

        if (Main.debug) {
            System.out.print("Model options: " + model.getClass().getName().trim() + " ");
        }
        System.out.print(model.getClass() + " ");
        for (String s : model.getOptions()) {
            System.out.print(s + " ");
        }

        pwout.print("Model options: " + model.getClass().getName().trim() + " ");
        for (String s : model.getOptions()) {
            pwout.print(s + " ");
        }

        //build model
        //            model.buildClassifier(data);
        fc.setClassifier(model);
        fc.buildClassifier(data);

        // cross validation 10 times on the model
        Evaluation eval = new Evaluation(data);
        //eval.crossValidateModel(model, data, 10, new Random(1));
        StringBuffer sb = new StringBuffer();
        eval.crossValidateModel(fc, data, 10, new Random(1), sb, new Range("first,last"), false);

        //System.out.println(sb);
        pwout.println(sb);
        pwout.flush();

        // output
        pwout.println("\n" + eval.toSummaryString());
        System.out.println(eval.toSummaryString());

        pwout.println(eval.toClassDetailsString());
        System.out.println(eval.toClassDetailsString());

        //calculate importants values
        String ev[] = eval.toClassDetailsString().split("\n");

        String ptmp[] = ev[3].trim().split(" ");
        String ntmp[] = ev[4].trim().split(" ");
        String avgtmp[] = ev[5].trim().split(" ");

        ArrayList<String> p = new ArrayList<String>();
        ArrayList<String> n = new ArrayList<String>();
        ArrayList<String> avg = new ArrayList<String>();

        for (String s : ptmp) {
            if (!s.trim().isEmpty()) {
                p.add(s);
            }
        }
        for (String s : ntmp) {
            if (!s.trim().isEmpty()) {
                n.add(s);
            }
        }
        for (String s : avgtmp) {
            if (!s.trim().isEmpty()) {
                avg.add(s);
            }
        }

        double tp = Double.parseDouble(p.get(0));
        double fp = Double.parseDouble(p.get(1));
        double tn = Double.parseDouble(n.get(0));
        double fn = Double.parseDouble(n.get(1));
        double auc = Double.parseDouble(avg.get(7));

        pwout.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn);
        System.out.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn);

        //specificity, sensitivity, Mathew's correlation, Prediction accuracy
        double sp = ((tn) / (tn + fp));
        double se = ((tp) / (tp + fn));
        double acc = ((tp + tn) / (tp + tn + fp + fn));
        double mcc = ((tp * tn) - (fp * fn)) / Math.sqrt((tp + fp) * (tn + fn) * (tp + fn) * tn + fp);

        String output = "\nse=" + dec.format(se).replace(",", ".") + "\nsp=" + dec.format(sp).replace(",", ".")
                + "\nACC=" + dec.format(acc).replace(",", ".") + "\nMCC=" + dec.format(mcc).replace(",", ".")
                + "\nAUC=" + dec.format(auc).replace(",", ".");

        pwout.println(output);
        System.out.println(output);

        pwout.println(eval.toMatrixString());
        System.out.println(eval.toMatrixString());

        pwout.flush();
        pwout.close();

        //Saving model
        System.out.println("Model saved: " + keyword + Main.modelExtension);
        weka.core.SerializationHelper.write(keyword + Main.modelExtension, fc.getClassifier() /*model*/);

        // get curve
        ThresholdCurve tc = new ThresholdCurve();
        int classIndex = 0;
        Instances result = tc.getCurve(eval.predictions(), classIndex);
        pwroc.print(result.toString());
        pwroc.flush();
        pwroc.close();

        // draw curve
        //rocCurve(eval);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:miRdup.WekaModule.java

License:Open Source License

public static void testModel(File testarff, String predictionsFile, String classifier, boolean predictMiRNA) {
    System.out.println("Testing model on " + predictionsFile + " adapted in " + testarff
            + ". Submitted to model " + classifier);

    try {//from w w  w  .j  av a  2s . c om
        //add predictions sequences to object
        ArrayList<MirnaObject> alobj = new ArrayList<MirnaObject>();
        BufferedReader br = null;
        try {
            br = new BufferedReader(new FileReader(predictionsFile + ".folded"));
        } catch (FileNotFoundException fileNotFoundException) {
            br = new BufferedReader(new FileReader(predictionsFile));
        }
        BufferedReader br2 = new BufferedReader(new FileReader(testarff));
        String line2 = br2.readLine();
        while (!line2.startsWith("@data")) {
            line2 = br2.readLine();
        }
        String line = " ";
        int cpt = 0;
        while (br.ready()) {
            line = br.readLine();
            line2 = br2.readLine();
            String[] tab = line.split("\t");
            MirnaObject m = new MirnaObject();
            m.setArff(line2);
            m.setId(cpt++);
            m.setIdName(tab[0]);
            m.setMatureSequence(tab[1]);
            m.setPrecursorSequence(tab[2]);
            m.setStructure(tab[3]);
            alobj.add(m);
        }
        br.close();
        br2.close();

        // load data
        DataSource source = new DataSource(testarff.toString());
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        //remove ID row
        data.deleteAttributeAt(0);
        //load model
        Classifier model = (Classifier) weka.core.SerializationHelper.read(classifier);

        // evaluate dataset on the model
        Evaluation eval = new Evaluation(data);

        eval.evaluateModel(model, data);

        FastVector fv = eval.predictions();

        // output
        PrintWriter pw = new PrintWriter(new FileWriter(predictionsFile + "." + classifier + ".miRdup.txt"));
        PrintWriter pwt = new PrintWriter(
                new FileWriter(predictionsFile + "." + classifier + ".miRdup.tab.txt"));
        PrintWriter pwout = new PrintWriter(
                new FileWriter(predictionsFile + "." + classifier + ".miRdupOutput.txt"));

        for (int i = 0; i < fv.size(); i++) {
            //System.out.println(fv.elementAt(i).toString());
            String[] tab = fv.elementAt(i).toString().split(" ");
            int actual = Integer.valueOf(tab[1].substring(0, 1));
            int predicted = Integer.valueOf(tab[2].substring(0, 1));
            double score = 0.0;
            boolean validated = false;
            if (actual == predicted) { //case validated
                int s = tab[4].length();
                try {
                    score = Double.valueOf(tab[4]);
                    //score = Double.valueOf(tab[4].substring(0, s - 1));
                } catch (NumberFormatException numberFormatException) {
                    score = 0.0;
                }

                validated = true;
            } else {// case not validated
                int s = tab[5].length();
                try {
                    score = Double.valueOf(tab[5]);
                    //score = Double.valueOf(tab[5].substring(0, s - 1));
                } catch (NumberFormatException numberFormatException) {
                    score = 0.0;
                }
                validated = false;
            }
            MirnaObject m = alobj.get(i);
            m.setActual(actual);
            m.setPredicted(predicted);
            m.setScore(score);
            m.setValidated(validated);
            m.setNeedPrediction(predictMiRNA);
            String predictionMiRNA = "";
            if (predictMiRNA && validated == false) {
                predictionMiRNA = miRdupPredictor.Predictor.predictionBySequence(m.getPrecursorSequence(),
                        classifier, classifier + ".miRdupPrediction.txt");
                try {
                    m.setPredictedmiRNA(predictionMiRNA.split(",")[0]);
                    m.setPredictedmiRNAstar(predictionMiRNA.split(",")[1]);
                } catch (Exception e) {
                    m.setPredictedmiRNA(predictionMiRNA);
                    m.setPredictedmiRNAstar(predictionMiRNA);
                }
            }

            pw.println(m.toStringFullPredictions());
            pwt.println(m.toStringPredictions());
            if (i % 100 == 0) {
                pw.flush();
                pwt.flush();
            }
        }

        //System.out.println(eval.toSummaryString("\nSummary results of predictions\n======\n", false));
        String[] out = eval.toSummaryString("\nSummary results of predictions\n======\n", false).split("\n");
        String info = out[0] + "\n" + out[1] + "\n" + out[2] + "\n" + out[4] + "\n" + out[5] + "\n" + out[6]
                + "\n" + out[7] + "\n" + out[11] + "\n";
        System.out.println(info);
        //System.out.println("Predicted position of the miRNA by miRdup:"+predictionMiRNA);
        pwout.println(
                "File " + predictionsFile + " adapted in " + testarff + " submitted to model " + classifier);
        pwout.println(info);

        pw.flush();
        pw.close();
        pwt.flush();
        pwt.close();
        pwout.flush();
        pwout.close();

        System.out.println("Results in " + predictionsFile + "." + classifier + ".miRdup.txt");

        // draw curve
        //rocCurve(eval);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:miRdup.WekaModule.java

License:Open Source License

public static String testModel(File testarff, String classifier) {
    // System.out.println("Testing model on "+testarff+". Submitted to model "+classifier);
    try {/*  w  ww .j  a  v a 2  s.com*/

        // load data
        DataSource source = new DataSource(testarff.toString());
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        //load model
        Classifier model = (Classifier) weka.core.SerializationHelper.read(classifier);

        // evaluate dataset on the model
        Evaluation eval = new Evaluation(data);

        eval.evaluateModel(model, data);
        FastVector fv = eval.predictions();

        //calculate importants values
        String ev[] = eval.toClassDetailsString().split("\n");

        String p = ev[3].trim();
        String n = ev[4].trim();

        double tp = Double.parseDouble(p.substring(0, 6).trim());
        double fp = 0;
        try {
            fp = Double.parseDouble(p.substring(11, 16).trim());
        } catch (Exception exception) {
            fp = Double.parseDouble(p.substring(7, 16).trim());
        }
        double tn = Double.parseDouble(n.substring(0, 6).trim());
        double fn = 0;
        try {
            fn = Double.parseDouble(n.substring(11, 16).trim());
        } catch (Exception exception) {
            fn = Double.parseDouble(n.substring(7, 16).trim());
        }

        //System.out.println("\nTP="+tp+"\nFP="+fp+"\nTN="+tn+"\nFN="+fn);
        //specificity, sensitivity, Mathew's correlation, Prediction accuracy
        double sp = ((tn) / (tn + fp));
        double se = ((tp) / (tp + fn));
        double acc = ((tp + tn) / (tp + tn + fp + fn));
        double mcc = ((tp * tn) - (fp * fn)) / Math.sqrt((tp + fp) * (tn + fn) * (tp + fn) * tn + fp);
        //            System.out.println("\nse="+se+"\nsp="+sp+"\nACC="+dec.format(acc).replace(",", ".")+"\nMCC="+dec.format(mcc).replace(",", "."));
        //            System.out.println(eval.toMatrixString());

        String out = dec.format(acc).replace(",", ".");
        System.out.println(out);
        return out;
    } catch (Exception e) {
        e.printStackTrace();
        return "";
    }

}