Example usage for weka.core Instances setClassIndex

List of usage examples for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex) 

Source Link

Document

Sets the class index of the set.

Usage

From source file:focusedCrawler.target.TargetClassifierImpl.java

License:Open Source License

public static TargetClassifier loadClassifier(String cfg) throws IOException, ClassNotFoundException {
    ParameterFile config = new ParameterFile(cfg);
    StopList stoplist = new StopListArquivo(config.getParam("STOPLIST_FILES"));
    InputStream is = new FileInputStream(config.getParam("FILE_CLASSIFIER"));
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    Classifier classifier = (Classifier) objectInputStream.readObject();
    String[] attributes = config.getParam("ATTRIBUTES", " ");
    System.out.println(attributes.length);
    weka.core.FastVector vectorAtt = new weka.core.FastVector();
    for (int i = 0; i < attributes.length; i++) {
        vectorAtt.addElement(new weka.core.Attribute(attributes[i]));
    }//from   w  ww . j  a v a2 s  .  c o  m
    String[] classValues = config.getParam("CLASS_VALUES", " ");
    weka.core.FastVector classAtt = new weka.core.FastVector();
    for (int i = 0; i < classValues.length; i++) {
        classAtt.addElement(classValues[i]);
    }
    vectorAtt.addElement(new weka.core.Attribute("class", classAtt));
    Instances insts = new Instances("target_classification", vectorAtt, 1);
    insts.setClassIndex(attributes.length);
    return new TargetClassifierImpl(classifier, insts, attributes, stoplist);
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private Instances loadInstances(String partialFilename) throws IOException {
    final String filename = repertoireFichiersARFF + partialFilename + ".arff";

    // This may be overkill, but since Weka doesn't specify what charset
    // it will use, make sure we read the file as UTF-8.
    ArffLoader loader = new ArffLoader() {
        {//from   w ww  .j  ava  2s .  co  m
            m_sourceReader = new InputStreamReader(new FileInputStream(filename), "UTF-8");
        }
    };
    Instances i = loader.getStructure();
    i.setClassIndex(i.numAttributes() - 1);
    return i;
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private void entrainerClassifieurSimpleOuDoublePhoneme(String repertoireFichiersARFF) throws Exception {
    Instances instances;

    DataSource source = new DataSource(
            repertoireFichiersARFF + Configuration.NOM_FICHIER_ARFF_SIMPLE_OU_DOUBLE_PHONEME + ".arff");
    instances = source.getDataSet();//from w w w . j  a v  a2 s  . co m

    // On definit la sortie (dernier attibut)
    instances.setClassIndex(instances.numAttributes() - 1);

    // On ne garde certains attributs
    instances = appliquerFiltre(filtreSimpleOuDoublePhoneme, instances);

    // On lance l'apprentissage
    classifieurSimpleOuDoublePhoneme = new J48();
    classifieurSimpleOuDoublePhoneme.buildClassifier(instances);
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private void entrainerClassifieurDoublePhoneme1er(String repertoireFichiersARFF) throws Exception {
    Instances instances;

    DataSource source = new DataSource(
            repertoireFichiersARFF + Configuration.NOM_FICHIER_ARFF_1er_DOUBLE_PHONEME + ".arff");
    instances = source.getDataSet();/*from  w  w w.  j a  v  a 2 s.c  o m*/

    // On definit la sortie (dernier attibut)
    instances.setClassIndex(instances.numAttributes() - 1);

    // On ne garde certains attributs
    instances = appliquerFiltre(filtreDoublePhoneme1er, instances);

    // On lance l'apprentissage
    classifieurDoublePhoneme1er = new J48();
    classifieurDoublePhoneme1er.buildClassifier(instances);

}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private void entrainerClassifieurDoublePhoneme2eme(String repertoireFichiersARFF) throws Exception {
    Instances instances;

    DataSource source = new DataSource(
            repertoireFichiersARFF + Configuration.NOM_FICHIER_ARFF_2eme_DOUBLE_PHONEME + ".arff");
    instances = source.getDataSet();/*from w ww.  jav  a 2 s . co  m*/

    // On definit la sortie (dernier attibut)
    instances.setClassIndex(instances.numAttributes() - 1);

    // On ne garde certains attributs
    instances = appliquerFiltre(filtreDoublePhoneme2eme, instances);

    // On lance l'apprentissage
    classifieurDoublePhoneme2eme = new J48();
    classifieurDoublePhoneme2eme.buildClassifier(instances);
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private void entrainerClassifieurSimplesPhonemes(String repertoireFichiersARFF) throws Exception {
    Instances instances;
    DataSource source = null;//from   w  w  w . j ava  2  s  . c o m

    tClassifieurSimplePhoneme = new J48[lexique.getNbGraphemes()];

    for (int i = 0; i < lexique.getNbGraphemes(); i++) {
        String graphemeCourant = lexique.getGraphemeFromIndice(i);

        try {
            source = new DataSource(repertoireFichiersARFF + Configuration.NOM_FICHIER_ARFF_SIMPLE_PHONEME + "_"
                    + graphemeCourant + ".arff");
        } catch (Exception e) {
            // Fichier introuvable
            System.out.println("Pas de fichier " + repertoireFichiersARFF
                    + Configuration.NOM_FICHIER_ARFF_SIMPLE_PHONEME + "_" + graphemeCourant + ".arff");
            source = null;
        }

        if (source != null) {
            System.out.println(
                    "        * " + graphemeCourant + " (" + (i + 1) + "/" + lexique.getNbGraphemes() + ")");
            instances = source.getDataSet();

            // On definit la sortie (dernier attibut)
            instances.setClassIndex(instances.numAttributes() - 1);

            // On ne garde certains attributs
            instances = appliquerFiltre(filtreSimplePhoneme, instances);

            // On lance l'apprentissage
            tClassifieurSimplePhoneme[i] = new J48();
            tClassifieurSimplePhoneme[i].buildClassifier(instances);
            System.gc();
        }

    }
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private double tester(Classifier res, String fichierTestARFF, Filter filtre) throws Exception {
    double nbOk = 0;
    double nbTotal = 0;

    if (res == null) {
        System.out.println("===============>" + fichierTestARFF);
        return -1;
    }/*from  w  ww .j ava  2  s  .  c o  m*/

    DataSource source = new DataSource(fichierTestARFF);
    Instances instances = source.getDataSet();
    nbTotal = instances.numInstances();
    instances.setClassIndex(instances.numAttributes() - 1);
    instances = appliquerFiltre(filtre, instances); // !!!!!!!!!!!!!!!!!  SUPER IMPORTANT !!!!!!!!!!!!!
    for (int i = 0; i < instances.numInstances(); i++) {
        double numeroClass = res.classifyInstance(instances.instance(i));
        if (numeroClass == instances.instance(i).classValue()) {
            nbOk++;
        }

    }

    return nbOk / nbTotal * 100;
}

From source file:function.FileModel.java

public static void SaveModel(String sourcepath, String outputpath) throws IOException, Exception {
    // create J48

    //kayanya sih ntar ganti sama class clasifiernya
    Classifier cls = new J48();

    // train/*from   w  w  w  .  j a  va 2  s  .  c om*/
    Instances inst = new Instances(new BufferedReader(new FileReader(sourcepath)));
    inst.setClassIndex(inst.numAttributes() - 1);
    cls.buildClassifier(inst);

    // serialize model
    weka.core.SerializationHelper.write(outputpath, cls);
}

From source file:function.LoadData.java

public static Instances getData(String filepath) throws Exception {
    DataSource source = new DataSource(filepath);
    Instances data = source.getDataSet();
    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }// www  .  ja  v a2 s  . c o  m
    return data;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates a classifier with the options given in an array of
 * strings. <p>//  w w w .j a  v a 2  s .c o  m
 *
 * Valid options are: <p>
 *
 * -t name of training file <br>
 * Name of the file with the training data. (required) <p>
 *
 * -T name of test file <br>
 * Name of the file with the test data. If missing a cross-validation
 * is performed. <p>
 *
 * -c class index <br>
 * Index of the class attribute (1, 2, ...; default: last). <p>
 *
 * -x number of folds <br>
 * The number of folds for the cross-validation (default: 10). <p>
 *
 * -s random number seed <br>
 * Random number seed for the cross-validation (default: 1). <p>
 *
 * -m file with cost matrix <br>
 * The name of a file containing a cost matrix. <p>
 *
 * -l name of model input file <br>
 * Loads classifier from the given file. <p>
 *
 * -d name of model output file <br>
 * Saves classifier built from the training data into the given file. <p>
 *
 * -v <br>
 * Outputs no statistics for the training data. <p>
 *
 * -o <br>
 * Outputs statistics only, not the classifier. <p>
 *
 * -i <br>
 * Outputs detailed information-retrieval statistics per class. <p>
 *
 * -k <br>
 * Outputs information-theoretic statistics. <p>
 *
 * -p <br>
 * Outputs predictions for test instances (and nothing else). <p>
 *
 * -r <br>
 * Outputs cumulative margin distribution (and nothing else). <p>
 *
 * -g <br>
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p>
 *
 * @param classifier machine learning classifier
 * @param options the array of string containing the options
 * @exception Exception if model could not be evaluated successfully
 * @return a string describing the results */
public static String[] evaluateModel(Classifier classifier, String trainFileName, String objectOutputFileName)
        throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    String testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName,
            attributeRangeString;
    boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true,
            printMargins = false, printComplexityStatistics = false, printGraph = false,
            classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    BufferedReader trainReader = null, testReader = null;
    ObjectInputStream objectInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

    try {

        String[] options = null;

        // Get basic options (options the same for all schemes)
        classIndexString = Utils.getOption('c', options);
        if (classIndexString.length() != 0) {
            classIndex = Integer.parseInt(classIndexString);
        }
        //  trainFileName = Utils.getOption('t', options);

        objectInputFileName = Utils.getOption('l', options);
        //   objectOutputFileName = Utils.getOption('d', options);
        testFileName = Utils.getOption('T', options);
        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }
            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test " + "file given.");
            }
        } else if ((objectInputFileName.length() != 0)
                && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) {
            throw new Exception("Classifier not incremental, or no " + "test file provided: can't "
                    + "use both train and model file.");
        }
        try {
            if (trainFileName.length() != 0) {
                trainReader = new BufferedReader(new FileReader(trainFileName));
            }
            if (testFileName.length() != 0) {
                testReader = new BufferedReader(new FileReader(testFileName));
            }
            if (objectInputFileName.length() != 0) {
                InputStream is = new FileInputStream(objectInputFileName);
                if (objectInputFileName.endsWith(".gz")) {
                    is = new GZIPInputStream(is);
                }
                objectInputStream = new ObjectInputStream(is);
            }
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        if (testFileName.length() != 0) {
            template = test = new Instances(testReader, 1);
            if (classIndex != -1) {
                test.setClassIndex(classIndex - 1);
            } else {
                test.setClassIndex(test.numAttributes() - 1);
            }
            if (classIndex > test.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
        }
        if (trainFileName.length() != 0) {
            if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)) {
                train = new Instances(trainReader, 1);
            } else {
                train = new Instances(trainReader);
            }
            template = train;
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            if ((testFileName.length() != 0) && !test.equalHeaders(train)) {
                throw new IllegalArgumentException("Train and test file not compatible!");
            }
            if (classIndex > train.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
            //train = new Instances(train);
        }
        if (template == null) {
            throw new Exception("No actual dataset provided to use as template");
        }
        seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }
        foldsString = Utils.getOption('x', options);
        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
        }
        costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());

        classStatistics = Utils.getFlag('i', options);
        noOutput = Utils.getFlag('o', options);
        trainStatistics = !Utils.getFlag('v', options);
        printComplexityStatistics = Utils.getFlag('k', options);
        printMargins = Utils.getFlag('r', options);
        printGraph = Utils.getFlag('g', options);
        sourceClass = Utils.getOption('z', options);
        printSource = (sourceClass.length() != 0);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClassifications = true;
            if (!attributeRangeString.equals("0")) {
                attributesToOutput = new Range(attributeRangeString);
            }
        }

        // If a model file is given, we can't process
        // scheme-specific options
        if (objectInputFileName.length() != 0) {
            Utils.checkForRemainingOptions(options);
        } else {

            // Set options for classifier
            if (classifier instanceof OptionHandler) {
                /* for (int i = 0; i < options.length; i++) {
                if (options[i].length() != 0) {
                    if (schemeOptionsText == null) {
                        schemeOptionsText = new StringBuffer();
                    }
                    if (options[i].indexOf(' ') != -1) {
                        schemeOptionsText.append('"' + options[i] + "\" ");
                    } else {
                        schemeOptionsText.append(options[i] + " ");
                    }
                }
                 }
                 */
                ((OptionHandler) classifier).setOptions(options);
            }
        }
        Utils.checkForRemainingOptions(options);

    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
    }

    // Setup up evaluation objects
    EvaluationInternal trainingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);
    EvaluationInternal testingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);

    if (objectInputFileName.length() != 0) {

        // Load classifier from file
        classifier = (Classifier) objectInputStream.readObject();
        objectInputStream.close();
    }

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null)
            && (trainFileName.length() != 0)) {

        // Build classifier incrementally
        trainingEvaluation.setPriors(train);
        testingEvaluation.setPriors(train);
        trainTimeStart = System.currentTimeMillis();
        if (objectInputFileName.length() == 0) {
            classifier.buildClassifier(train);
        }
        while (train.readInstance(trainReader)) {

            trainingEvaluation.updatePriors(train.instance(0));
            testingEvaluation.updatePriors(train.instance(0));
            ((UpdateableClassifier) classifier).updateClassifier(train.instance(0));
            train.delete(0);
        }
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        trainReader.close();
    } else if (objectInputFileName.length() == 0) {

        // Build classifier in one go
        tempTrain = new Instances(train);
        trainingEvaluation.setPriors(tempTrain);
        testingEvaluation.setPriors(tempTrain);
        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(tempTrain);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        OutputStream os = new FileOutputStream(objectOutputFileName);
        if (objectOutputFileName.endsWith(".gz")) {
            os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        objectOutputStream.flush();
        objectOutputStream.close();
    }

    /*   // If classifier is drawable output string describing graph
       if ((classifier instanceof Drawable)
    && (printGraph)) {
    return ((Drawable) classifier).graph();
       }
            
       // Output the classifier as equivalent source
       if ((classifier instanceof Sourcable)
    && (printSource)) {
    return wekaStaticWrapper((Sourcable) classifier, sourceClass);
       }
            
       // Output test instance predictions only
       if (printClassifications) {
    return printClassifications(classifier, new Instances(template, 0),
                                testFileName, classIndex, attributesToOutput);
       }
       */

    // Output model
    if (!(noOutput || printMargins)) {
        if (classifier instanceof OptionHandler) {
            if (schemeOptionsText != null) {
                text.append("\nOptions: " + schemeOptionsText);
                text.append("\n");
            }
        }
        text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
        text.append("\n=== Evaluation Cost Matrix ===\n\n").append(costMatrix.toString());
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainFileName.length() != 0)) {

        if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)
                && (costMatrix == null)) {

            // Classifier was trained incrementally, so we have to
            // reopen the training data in order to test on it.
            trainReader = new BufferedReader(new FileReader(trainFileName));

            // Incremental testing
            train = new Instances(trainReader, 1);
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            testTimeStart = System.currentTimeMillis();
            while (train.readInstance(trainReader)) {

                trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0));
                train.delete(0);
            }
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
            trainReader.close();
        } else {
            testTimeStart = System.currentTimeMillis();
            trainingEvaluation.evaluateModel(classifier, train);
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        }

        // Print the results of the training evaluation
        //  if (printMargins) {
        //      return trainingEvaluation.toCumulativeMarginDistributionString();
        //   } else {
        text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2)
                + " seconds");
        text.append("\nTime taken to test model on training data: "
                + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");
        text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n",
                printComplexityStatistics));
        if (template.classAttribute().isNominal()) {
            if (classStatistics) {
                text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }
            text.append("\n\n" + trainingEvaluation.toMatrixString());
        }

        //  }
    }

    // Compute proper error estimates
    if (testFileName.length() != 0) {

        // Testing is on the supplied test data
        while (test.readInstance(testReader)) {

            testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0));
            test.delete(0);
        }
        testReader.close();

        text.append("\n\n"
                + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics));
    } else if (trainFileName.length() != 0) {

        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        testingEvaluation.crossValidateModel(classifier, train, folds, random);
        if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                    printComplexityStatistics));
        } else {
            text.append("\n\n\n" + testingEvaluation
                    .toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
        }
    }
    if (template.classAttribute().isNominal()) {
        if (classStatistics) {
            text.append("\n\n" + testingEvaluation.toClassDetailsString());
        }
        text.append("\n\n" + testingEvaluation.toMatrixString());
    }

    String result = "\t" + Utils.doubleToString(trainingEvaluation.pctCorrect(), 12, 4) + " %";
    result += "       " + Utils.doubleToString(testingEvaluation.pctCorrect(), 12, 4) + " %";

    String[] returnString = { text.toString(), result };
    return returnString;
}