Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {//from  w  w  w  . ja va  2 s .  c  om
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:ffnn.FFNNTubesAI.java

@Override
public void buildClassifier(Instances i) throws Exception {
    Instance temp_instance = null;/*ww  w  .j a  v  a 2 s .c o m*/
    RealMatrix error_output;
    RealMatrix error_hidden;
    RealMatrix input_matrix;
    RealMatrix hidden_matrix;
    RealMatrix output_matrix;
    Instances temp_instances;
    int r = 0;
    Scanner scan = new Scanner(System.in);

    output_layer = i.numDistinctValues(i.classIndex()); //3
    temp_instances = filterNominalNumeric(i);

    if (output_layer == 2) {
        Add filter = new Add();
        filter.setAttributeIndex("last");
        filter.setAttributeName("dummy");
        filter.setInputFormat(temp_instances);
        temp_instances = Filter.useFilter(temp_instances, filter);
        //            System.out.println(temp_instances);
        for (int j = 0; j < temp_instances.numInstances(); j++) {
            if (temp_instances.instance(j).value(temp_instances.numAttributes() - 2) == 0) {
                temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 1);
                temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 0);
            } else {
                temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 0);
                temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 1);
            }
        }
    }

    //temp_instances.randomize(temp_instances.getRandomNumberGenerator(1));
    //System.out.println(temp_instances);
    input_layer = temp_instances.numAttributes() - output_layer; //4
    hidden_layer = 0;
    while (hidden_layer < 1) {
        System.out.print("Hidden layer : ");
        hidden_layer = scan.nextInt();
    }
    int init_hidden = hidden_layer;
    error_hidden = new BlockRealMatrix(1, hidden_layer);
    error_output = new BlockRealMatrix(1, output_layer);
    input_matrix = new BlockRealMatrix(1, input_layer + 1); //Menambahkan bias

    buildWeight(input_layer, hidden_layer, output_layer);

    long last_time = System.nanoTime();
    double last_error_rate = 1;
    double best_error_rate = 1;

    double last_update = System.nanoTime();

    // brp iterasi
    //        for( long itr = 0; last_error_rate > 0.001; ++ itr ){
    for (long itr = 0; itr < 50000; ++itr) {
        if (r == 10) {
            break;
        }
        long time = System.nanoTime();
        if (time - last_time > 2000000000) {
            Evaluation eval = new Evaluation(i);
            eval.evaluateModel(this, i);

            double accry = eval.correct() / eval.numInstances();
            if (eval.errorRate() < last_error_rate) {
                last_update = System.nanoTime();
                if (eval.errorRate() < best_error_rate)
                    SerializationHelper.write(accry + "-" + time + ".model", this);
            }

            if (accry > 0)
                last_error_rate = eval.errorRate();

            // 2 minute without improvement restart
            if (time - last_update > 30000000000L) {
                last_update = System.nanoTime();
                learning_rate = random() * 0.05;
                hidden_layer = (int) (10 + floor(random() * 15));
                hidden_layer = (int) floor((hidden_layer / 25) * init_hidden);
                if (hidden_layer == 0) {
                    hidden_layer = 1;
                }
                itr = 0;
                System.out.println("RESTART " + learning_rate + " " + hidden_layer);
                buildWeight(input_layer, hidden_layer, output_layer);
                r++;
            }

            System.out.println(accry + " " + itr);
            last_time = time;
        }

        for (int j = 0; j < temp_instances.numInstances(); j++) {
            // foward !!
            temp_instance = temp_instances.instance(j);

            for (int k = 0; k < input_layer; k++) {
                input_matrix.setEntry(0, k, temp_instance.value(k));
            }
            input_matrix.setEntry(0, input_layer, 1.0); // bias

            hidden_matrix = input_matrix.multiply(weight1);
            for (int y = 0; y < hidden_layer; ++y) {
                hidden_matrix.setEntry(0, y, sig(hidden_matrix.getEntry(0, y)));
            }

            output_matrix = hidden_matrix.multiply(weight2).add(bias2);
            for (int y = 0; y < output_layer; ++y) {
                output_matrix.setEntry(0, y, sig(output_matrix.getEntry(0, y)));
            }

            // backward <<

            // error layer 2
            double total_err = 0;
            for (int k = 0; k < output_layer; k++) {
                double o = output_matrix.getEntry(0, k);
                double t = temp_instance.value(input_layer + k);
                double err = o * (1 - o) * (t - o);
                total_err += err * err;
                error_output.setEntry(0, k, err);
            }

            // back propagation layer 2
            for (int y = 0; y < hidden_layer; y++) {
                for (int x = 0; x < output_layer; ++x) {
                    double wold = weight2.getEntry(y, x);
                    double correction = learning_rate * error_output.getEntry(0, x)
                            * hidden_matrix.getEntry(0, y);
                    weight2.setEntry(y, x, wold + correction);
                }
            }

            for (int x = 0; x < output_layer; ++x) {
                double correction = learning_rate * error_output.getEntry(0, x); // anggap 1 inputnya
                bias2.setEntry(0, x, bias2.getEntry(0, x) + correction);
            }

            // error layer 1
            for (int k = 0; k < hidden_layer; ++k) {
                double o = hidden_matrix.getEntry(0, k);
                double t = 0;
                for (int x = 0; x < output_layer; ++x) {
                    t += error_output.getEntry(0, x) * weight2.getEntry(k, x);
                }
                double err = o * (1 - o) * t;
                error_hidden.setEntry(0, k, err);
            }

            // back propagation layer 1
            for (int y = 0; y < input_layer + 1; ++y) {
                for (int x = 0; x < hidden_layer; ++x) {
                    double wold = weight1.getEntry(y, x);
                    double correction = learning_rate * error_hidden.getEntry(0, x)
                            * input_matrix.getEntry(0, y);
                    weight1.setEntry(y, x, wold + correction);
                }
            }
        }
    }
}

From source file:ffnn.MultilayerPerceptron.java

License:Open Source License

/**
 * This function sets what the m_numeric flag to represent the passed class it
 * also performs the normalization of the attributes if applicable and sets up
 * the info to normalize the class. (note that regardless of the options it
 * will fill an array with the range and base, set to normalize all attributes
 * and the class to be between -1 and 1)
 * /*from www .  j a v  a 2  s  .c o m*/
 * @param inst the instances.
 * @return The modified instances. This needs to be done. If the attributes
 *         are normalized then deep copies will be made of all the instances
 *         which will need to be passed back out.
 */
private Instances setClassType(Instances inst) throws Exception {
    if (inst != null) {
        // x bounds
        m_attributeRanges = new double[inst.numAttributes()];
        m_attributeBases = new double[inst.numAttributes()];
        for (int noa = 0; noa < inst.numAttributes(); noa++) {
            double min = Double.POSITIVE_INFINITY;
            double max = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < inst.numInstances(); i++) {
                if (!inst.instance(i).isMissing(noa)) {
                    double value = inst.instance(i).value(noa);
                    if (value < min) {
                        min = value;
                    }
                    if (value > max) {
                        max = value;
                    }
                }
            }
            m_attributeRanges[noa] = (max - min) / 2;
            m_attributeBases[noa] = (max + min) / 2;
        }

        if (m_normalizeAttributes) {
            for (int i = 0; i < inst.numInstances(); i++) {
                Instance currentInstance = inst.instance(i);
                double[] instance = new double[inst.numAttributes()];
                for (int noa = 0; noa < inst.numAttributes(); noa++) {
                    if (noa != inst.classIndex()) {
                        if (m_attributeRanges[noa] != 0) {
                            instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa])
                                    / m_attributeRanges[noa];
                        } else {
                            instance[noa] = currentInstance.value(noa) - m_attributeBases[noa];
                        }
                    } else {
                        instance[noa] = currentInstance.value(noa);
                    }
                }
                inst.set(i, new DenseInstance(currentInstance.weight(), instance));
            }
        }

        if (inst.classAttribute().isNumeric()) {
            m_numeric = true;
        } else {
            m_numeric = false;
        }
    }
    return inst;
}

From source file:FFNN.MultiplePerceptron.java

public MultiplePerceptron(int itt, double learn, int numHLayer, Instances i) {
    listNodeHidden = new ArrayList<>();//inisialisasis listNodeHidden
    listNodeOutput = new ArrayList<>();
    itteration = itt;// w  w  w.ja  v  a2s.  c o  m
    learningRate = learn;
    numHiddenLayer = numHLayer;
    for (int hiddenLayer = 0; hiddenLayer < numHiddenLayer + 1; hiddenLayer++) {//buat neuron untuk hidden layer
        //ditambah 1 untuk neuron bias
        listNodeHidden.add(new Node(i.numAttributes()));

    }

    for (int numInstance = 0; numInstance < i.numClasses(); numInstance++) {//buat neuron untuk output
        listNodeOutput.add(new Node(listNodeHidden.size()));
    }
    target = new ArrayList<>();
    instancesToDouble = new double[i.numInstances()];
    for (int numIns = 0; numIns < i.numInstances(); numIns++) {
        instancesToDouble[numIns] = i.instance(numIns).toDoubleArray()[i.classIndex()];
    }
}

From source file:FFNN.MultiplePerceptron.java

@Override
public void buildClassifier(Instances i) {
    //iterasi//from w ww. j a v  a 2s.c  om
    for (int itt = 0; itt < itteration; itt++) {
        //            System.out.println("Iterasi ke "+ itt);
        for (int indexInstance = 0; indexInstance < i.numInstances(); indexInstance++) {
            ArrayList<Double> listInput = new ArrayList<>();

            //mengisi nilai listInput dengan nilai di instances
            listInput.add(1.0);//ini bias input
            for (int index = 0; index < i.numAttributes() - 1; index++)
                listInput.add(i.get(indexInstance).value(index));

            ArrayList<Double> listOutputHidden = new ArrayList<>();
            listOutputHidden.add(1.0);//input bias
            //                System.out.println();
            //                System.out.println("Hidden layer");
            listNodeHidden.get(0).setValue(1.0);//bias gak boleh ganti output
            //menghitung output hidden layer
            for (int index = 1; index < listNodeHidden.size(); index++) {//output bias tidak boleh ganti
                double value = listNodeHidden.get(index).output(listInput);
                listNodeHidden.get(index).setValue(value);
                listOutputHidden.add(value);
                //                    System.out.println("neuron "+index+" "+value);
            }

            //                System.out.println();
            //                System.out.println("Output layer");
            //menghitung output output layer
            for (int index = 0; index < listNodeOutput.size(); index++) {
                double value = listNodeOutput.get(index).output(listOutputHidden);
                listNodeOutput.get(index).setValue(value);
                //                    System.out.print(value+" ");

            }

            //            System.out.println(listNodeHidden.get(1).getWeightFromList(0));   
            calculateError(indexInstance);

            updateBobot(i.instance(indexInstance));
        }
    }
    for (int idx = 0; idx < listNodeHidden.size(); idx++) {
        System.out.println("Hidden value " + listNodeHidden.get(idx).getValue());
        System.out.println("Hidden error " + listNodeHidden.get(idx).getError());
        for (int idx2 = 0; idx2 < listNodeHidden.get(idx).getWeightSize(); idx2++)
            System.out.println("Hidden weight" + listNodeHidden.get(idx).getWeightFromList(idx2));
    }
    System.out.println();
    for (int idx = 0; idx < listNodeOutput.size(); idx++) {
        System.out.println("Output value " + listNodeOutput.get(idx).getValue());
        System.out.println("Output error " + listNodeOutput.get(idx).getError());
        for (int idx2 = 0; idx2 < listNodeOutput.get(idx).getWeightSize(); idx2++)
            System.out.println("Output weight" + listNodeOutput.get(idx).getWeightFromList(idx2));
    }
}

From source file:FinalMineria.Reconocimiento.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./*  w  w  w  .j  av a  2  s.c om*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException, Exception {

    String accion = request.getParameter("accion");
    BufferedReader br = null;
    String ruta = request.getServletContext().getRealPath("/Recursos");
    br = new BufferedReader(new FileReader(ruta + "/nombres.txt"));
    linea = br.readLine();
    br.close();
    if ("Detener".equals(accion)) {
        grabar.finish();
        try {
            Thread.sleep(4000);
        } catch (InterruptedException ex) {
            Logger.getLogger(GrabarAudio.class.getName()).log(Level.SEVERE, null, ex);
        }
        String comando = "cmd /c " + request.getServletContext().getRealPath("/Recursos/OpenSmile")
                + "\\SMILExtract_Release.exe -C " + request.getServletContext().getRealPath("/Recursos/config")
                + "\\IS12_speaker_trait.conf -I " + request.getServletContext().getRealPath("/Recursos/audios")
                + "\\prueba.wav -O " + request.getServletContext().getRealPath("/Recursos/arff")
                + "\\prueba.arff -classes {" + linea + "} -classlabel ?";
        Process proceso = Runtime.getRuntime().exec(comando);
        proceso.waitFor();
        Instances prueba, conocimiento;
        try (BufferedReader archivoBase = new BufferedReader(new FileReader(
                request.getServletContext().getRealPath("/Recursos/arff") + "\\baseDatos.arff"))) {
            conocimiento = new Instances(archivoBase);
        }
        try (BufferedReader archivoPrueba = new BufferedReader(
                new FileReader(request.getServletContext().getRealPath("/Recursos/arff") + "\\prueba.arff"))) {
            prueba = new Instances(archivoPrueba);
        }

        conocimiento.deleteStringAttributes();
        conocimiento.setClassIndex(981);
        prueba.deleteStringAttributes();
        prueba.setClassIndex(981);
        Classifier clasificadorModelo = (Classifier) new NaiveBayes();
        clasificadorModelo.buildClassifier(conocimiento);
        double valorP = clasificadorModelo.classifyInstance(prueba.instance(prueba.numInstances() - 1));
        String prediccion = prueba.classAttribute().value((int) valorP);
        System.out.println(prediccion);
        request.setAttribute("prediccion", prediccion);
        RequestDispatcher dispatcher = request.getRequestDispatcher("./Hablante.jsp");
        dispatcher.forward(request, response);
    } else if ("Grabar".equals(accion)) {
        ruta = request.getServletContext().getRealPath("/Recursos/audios");
        grabar = new Grabador(ruta + "\\" + "prueba");
        Thread stopper = new Thread(new Runnable() {
            public void run() {
                try {
                    Thread.sleep(tiempo);
                } catch (InterruptedException ex) {
                    ex.printStackTrace();
                }
                grabar.finish();
            }
        });

        stopper.start();

        // start recording
        grabar.start();
        response.sendRedirect("./grabar.jsp");
    }
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private Instances appliquerFiltre(Filter filtre, Instances instances) throws Exception {
    Instances newInstances;/*from   ww  w .j a va 2  s. c o  m*/
    Instance temp;

    filtre.setInputFormat(instances);
    for (int i = 0; i < instances.numInstances(); i++) {
        filtre.input(instances.instance(i));
    }

    filtre.batchFinished();
    newInstances = filtre.getOutputFormat();
    while ((temp = filtre.output()) != null) {
        newInstances.add(temp);
    }

    return newInstances;
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private double tester(Classifier res, String fichierTestARFF, Filter filtre) throws Exception {
    double nbOk = 0;
    double nbTotal = 0;

    if (res == null) {
        System.out.println("===============>" + fichierTestARFF);
        return -1;
    }// w  w  w . jav  a 2s.  c  om

    DataSource source = new DataSource(fichierTestARFF);
    Instances instances = source.getDataSet();
    nbTotal = instances.numInstances();
    instances.setClassIndex(instances.numAttributes() - 1);
    instances = appliquerFiltre(filtre, instances); // !!!!!!!!!!!!!!!!!  SUPER IMPORTANT !!!!!!!!!!!!!
    for (int i = 0; i < instances.numInstances(); i++) {
        double numeroClass = res.classifyInstance(instances.instance(i));
        if (numeroClass == instances.instance(i).classValue()) {
            nbOk++;
        }

    }

    return nbOk / nbTotal * 100;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates a classifier with the options given in an array of
 * strings. <p>/*from  ww w.java  2  s  . co  m*/
 *
 * Valid options are: <p>
 *
 * -t name of training file <br>
 * Name of the file with the training data. (required) <p>
 *
 * -T name of test file <br>
 * Name of the file with the test data. If missing a cross-validation
 * is performed. <p>
 *
 * -c class index <br>
 * Index of the class attribute (1, 2, ...; default: last). <p>
 *
 * -x number of folds <br>
 * The number of folds for the cross-validation (default: 10). <p>
 *
 * -s random number seed <br>
 * Random number seed for the cross-validation (default: 1). <p>
 *
 * -m file with cost matrix <br>
 * The name of a file containing a cost matrix. <p>
 *
 * -l name of model input file <br>
 * Loads classifier from the given file. <p>
 *
 * -d name of model output file <br>
 * Saves classifier built from the training data into the given file. <p>
 *
 * -v <br>
 * Outputs no statistics for the training data. <p>
 *
 * -o <br>
 * Outputs statistics only, not the classifier. <p>
 *
 * -i <br>
 * Outputs detailed information-retrieval statistics per class. <p>
 *
 * -k <br>
 * Outputs information-theoretic statistics. <p>
 *
 * -p <br>
 * Outputs predictions for test instances (and nothing else). <p>
 *
 * -r <br>
 * Outputs cumulative margin distribution (and nothing else). <p>
 *
 * -g <br>
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p>
 *
 * @param classifier machine learning classifier
 * @param options the array of string containing the options
 * @exception Exception if model could not be evaluated successfully
 * @return a string describing the results */
public static String[] evaluateModel(Classifier classifier, String trainFileName, String objectOutputFileName)
        throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    String testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName,
            attributeRangeString;
    boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true,
            printMargins = false, printComplexityStatistics = false, printGraph = false,
            classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    BufferedReader trainReader = null, testReader = null;
    ObjectInputStream objectInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

    try {

        String[] options = null;

        // Get basic options (options the same for all schemes)
        classIndexString = Utils.getOption('c', options);
        if (classIndexString.length() != 0) {
            classIndex = Integer.parseInt(classIndexString);
        }
        //  trainFileName = Utils.getOption('t', options);

        objectInputFileName = Utils.getOption('l', options);
        //   objectOutputFileName = Utils.getOption('d', options);
        testFileName = Utils.getOption('T', options);
        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }
            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test " + "file given.");
            }
        } else if ((objectInputFileName.length() != 0)
                && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) {
            throw new Exception("Classifier not incremental, or no " + "test file provided: can't "
                    + "use both train and model file.");
        }
        try {
            if (trainFileName.length() != 0) {
                trainReader = new BufferedReader(new FileReader(trainFileName));
            }
            if (testFileName.length() != 0) {
                testReader = new BufferedReader(new FileReader(testFileName));
            }
            if (objectInputFileName.length() != 0) {
                InputStream is = new FileInputStream(objectInputFileName);
                if (objectInputFileName.endsWith(".gz")) {
                    is = new GZIPInputStream(is);
                }
                objectInputStream = new ObjectInputStream(is);
            }
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        if (testFileName.length() != 0) {
            template = test = new Instances(testReader, 1);
            if (classIndex != -1) {
                test.setClassIndex(classIndex - 1);
            } else {
                test.setClassIndex(test.numAttributes() - 1);
            }
            if (classIndex > test.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
        }
        if (trainFileName.length() != 0) {
            if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)) {
                train = new Instances(trainReader, 1);
            } else {
                train = new Instances(trainReader);
            }
            template = train;
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            if ((testFileName.length() != 0) && !test.equalHeaders(train)) {
                throw new IllegalArgumentException("Train and test file not compatible!");
            }
            if (classIndex > train.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
            //train = new Instances(train);
        }
        if (template == null) {
            throw new Exception("No actual dataset provided to use as template");
        }
        seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }
        foldsString = Utils.getOption('x', options);
        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
        }
        costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());

        classStatistics = Utils.getFlag('i', options);
        noOutput = Utils.getFlag('o', options);
        trainStatistics = !Utils.getFlag('v', options);
        printComplexityStatistics = Utils.getFlag('k', options);
        printMargins = Utils.getFlag('r', options);
        printGraph = Utils.getFlag('g', options);
        sourceClass = Utils.getOption('z', options);
        printSource = (sourceClass.length() != 0);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClassifications = true;
            if (!attributeRangeString.equals("0")) {
                attributesToOutput = new Range(attributeRangeString);
            }
        }

        // If a model file is given, we can't process
        // scheme-specific options
        if (objectInputFileName.length() != 0) {
            Utils.checkForRemainingOptions(options);
        } else {

            // Set options for classifier
            if (classifier instanceof OptionHandler) {
                /* for (int i = 0; i < options.length; i++) {
                if (options[i].length() != 0) {
                    if (schemeOptionsText == null) {
                        schemeOptionsText = new StringBuffer();
                    }
                    if (options[i].indexOf(' ') != -1) {
                        schemeOptionsText.append('"' + options[i] + "\" ");
                    } else {
                        schemeOptionsText.append(options[i] + " ");
                    }
                }
                 }
                 */
                ((OptionHandler) classifier).setOptions(options);
            }
        }
        Utils.checkForRemainingOptions(options);

    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
    }

    // Setup up evaluation objects
    EvaluationInternal trainingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);
    EvaluationInternal testingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);

    if (objectInputFileName.length() != 0) {

        // Load classifier from file
        classifier = (Classifier) objectInputStream.readObject();
        objectInputStream.close();
    }

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null)
            && (trainFileName.length() != 0)) {

        // Build classifier incrementally
        trainingEvaluation.setPriors(train);
        testingEvaluation.setPriors(train);
        trainTimeStart = System.currentTimeMillis();
        if (objectInputFileName.length() == 0) {
            classifier.buildClassifier(train);
        }
        while (train.readInstance(trainReader)) {

            trainingEvaluation.updatePriors(train.instance(0));
            testingEvaluation.updatePriors(train.instance(0));
            ((UpdateableClassifier) classifier).updateClassifier(train.instance(0));
            train.delete(0);
        }
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        trainReader.close();
    } else if (objectInputFileName.length() == 0) {

        // Build classifier in one go
        tempTrain = new Instances(train);
        trainingEvaluation.setPriors(tempTrain);
        testingEvaluation.setPriors(tempTrain);
        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(tempTrain);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        OutputStream os = new FileOutputStream(objectOutputFileName);
        if (objectOutputFileName.endsWith(".gz")) {
            os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        objectOutputStream.flush();
        objectOutputStream.close();
    }

    /*   // If classifier is drawable output string describing graph
       if ((classifier instanceof Drawable)
    && (printGraph)) {
    return ((Drawable) classifier).graph();
       }
            
       // Output the classifier as equivalent source
       if ((classifier instanceof Sourcable)
    && (printSource)) {
    return wekaStaticWrapper((Sourcable) classifier, sourceClass);
       }
            
       // Output test instance predictions only
       if (printClassifications) {
    return printClassifications(classifier, new Instances(template, 0),
                                testFileName, classIndex, attributesToOutput);
       }
       */

    // Output model
    if (!(noOutput || printMargins)) {
        if (classifier instanceof OptionHandler) {
            if (schemeOptionsText != null) {
                text.append("\nOptions: " + schemeOptionsText);
                text.append("\n");
            }
        }
        text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
        text.append("\n=== Evaluation Cost Matrix ===\n\n").append(costMatrix.toString());
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainFileName.length() != 0)) {

        if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)
                && (costMatrix == null)) {

            // Classifier was trained incrementally, so we have to
            // reopen the training data in order to test on it.
            trainReader = new BufferedReader(new FileReader(trainFileName));

            // Incremental testing
            train = new Instances(trainReader, 1);
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            testTimeStart = System.currentTimeMillis();
            while (train.readInstance(trainReader)) {

                trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0));
                train.delete(0);
            }
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
            trainReader.close();
        } else {
            testTimeStart = System.currentTimeMillis();
            trainingEvaluation.evaluateModel(classifier, train);
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        }

        // Print the results of the training evaluation
        //  if (printMargins) {
        //      return trainingEvaluation.toCumulativeMarginDistributionString();
        //   } else {
        text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2)
                + " seconds");
        text.append("\nTime taken to test model on training data: "
                + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");
        text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n",
                printComplexityStatistics));
        if (template.classAttribute().isNominal()) {
            if (classStatistics) {
                text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }
            text.append("\n\n" + trainingEvaluation.toMatrixString());
        }

        //  }
    }

    // Compute proper error estimates
    if (testFileName.length() != 0) {

        // Testing is on the supplied test data
        while (test.readInstance(testReader)) {

            testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0));
            test.delete(0);
        }
        testReader.close();

        text.append("\n\n"
                + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics));
    } else if (trainFileName.length() != 0) {

        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        testingEvaluation.crossValidateModel(classifier, train, folds, random);
        if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                    printComplexityStatistics));
        } else {
            text.append("\n\n\n" + testingEvaluation
                    .toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
        }
    }
    if (template.classAttribute().isNominal()) {
        if (classStatistics) {
            text.append("\n\n" + testingEvaluation.toClassDetailsString());
        }
        text.append("\n\n" + testingEvaluation.toMatrixString());
    }

    String result = "\t" + Utils.doubleToString(trainingEvaluation.pctCorrect(), 12, 4) + " %";
    result += "       " + Utils.doubleToString(testingEvaluation.pctCorrect(), 12, 4) + " %";

    String[] returnString = { text.toString(), result };
    return returnString;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates the classifier on a given set of instances. Note that
 * the data must have exactly the same format (e.g. order of
 * attributes) as the data used to train the classifier! Otherwise
 * the results will generally be meaningless.
 *
 * @param classifier machine learning classifier
 * @param data set of test instances for evaluation
 * @exception Exception if model could not be evaluated
 * successfully//from  ww w  .  j a v a 2  s.  c o  m
 */
public double[] evaluateModel(Classifier classifier, Instances data) throws Exception {

    double predictions[] = new double[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        predictions[i] = evaluateModelOnce((Classifier) classifier, data.instance(i));
    }
    return predictions;
}