Example usage for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute()

Source Link

Document

Returns the class attribute.

Usage

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Prints the predictions for the given dataset into a String variable.
 *//*  w  w w. ja  v  a  2  s . co m*/
protected static String printClassifications(Classifier classifier, Instances train, String testFileName,
        int classIndex, Range attributesToOutput) throws Exception {

    StringBuffer text = new StringBuffer();
    if (testFileName.length() != 0) {
        BufferedReader testReader = null;
        try {
            testReader = new BufferedReader(new FileReader(testFileName));
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        Instances test = new Instances(testReader, 1);
        if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
        } else {
            test.setClassIndex(test.numAttributes() - 1);
        }
        int i = 0;
        while (test.readInstance(testReader)) {
            Instance instance = test.instance(0);
            Instance withMissing = (Instance) instance.copy();
            withMissing.setDataset(test);
            double predValue = ((Classifier) classifier).classifyInstance(withMissing);
            if (test.classAttribute().isNumeric()) {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + predValue + " ");
                }
                if (instance.classIsMissing()) {
                    text.append("missing");
                } else {
                    text.append(instance.classValue());
                }
                text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
            } else {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + test.classAttribute().value((int) predValue) + " ");
                }
                if (Instance.isMissingValue(predValue)) {
                    text.append("missing ");
                } else {
                    text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
                }
                text.append(instance.toString(instance.classIndex()) + " "
                        + attributeValuesString(withMissing, attributesToOutput) + "\n");
            }
            test.delete(0);
            i++;
        }
        testReader.close();
    }
    return text.toString();
}

From source file:general.Util.java

/**
 * show learning statistic result by percentage split
 * @param data training data/*from  w  ww .jav a2  s.  com*/
 * @param trainPercent percentage of the training data
 * @param Classifier model
 */
public static void PercentageSplit(Instances data, double trainPercent, String Classifier) {
    try {
        int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100);
        int testSize = data.numInstances() - trainSize;

        data.randomize(new Random(1));

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(test.numAttributes() - 1);

        switch (Classifier.toLowerCase()) {
        case "naivebayes":
            classifier = new NaiveBayes();
            break;
        case "j48-prune":
            classifier = new MyJ48(true, 0.25f);
            break;
        case "j48-unprune":
            classifier = new MyJ48(false, 0f);
            break;
        case "id3":
            classifier = new MyID3();
            break;
        default:
            break;
        }
        classifier.buildClassifier(train);

        for (int i = 0; i < test.numInstances(); i++) {
            try {
                double pred = classifier.classifyInstance(test.instance(i));
                System.out.print("ID: " + test.instance(i));
                System.out
                        .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
                System.out.println(", predicted: " + test.classAttribute().value((int) pred));
            } catch (Exception ex) {
                Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        // Start evaluate model using instances test and print results
        try {
            Evaluation eval = new Evaluation(train);
            eval.evaluateModel(classifier, test);
            System.out.println(eval.toSummaryString("\nResults\n\n", false));
        } catch (Exception e) {
            e.printStackTrace();
        }

    } catch (Exception ex) {
        Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:general.Util.java

/**
 * Classify test set using pre-build model
 * @param model model pathfile//w ww . j av a 2 s  . c o  m
 * @param test test file
 */
public static void doClassify(Classifier model, Instances test) {
    test.setClassIndex(test.numAttributes() - 1);
    for (int i = 0; i < test.numInstances(); i++) {
        try {
            double pred = model.classifyInstance(test.instance(i));
            System.out.print("ID: " + test.instance(i));
            System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
            System.out.println(", predicted: " + test.classAttribute().value((int) pred));
        } catch (Exception ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());//from w w w  .  j  a  va 2 s  .c  om
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:gr.auth.ee.lcs.data.representations.complex.SingleClassRepresentation.java

License:Open Source License

@Override
protected void createClassRepresentation(final Instances instances) {

    if (instances.classIndex() < 0)
        instances.setClassIndex(instances.numAttributes() - 1);

    // Rule Consequents
    final Enumeration<?> classNames = instances.classAttribute().enumerateValues();
    final String[] ruleConsequents = new String[instances.numClasses()];
    this.ruleConsequents = ruleConsequents;
    for (int i = 0; i < instances.numClasses(); i++)
        ruleConsequents[i] = (String) classNames.nextElement();

    attributeList[attributeList.length - 1] = new UniLabel(chromosomeSize, "class", ruleConsequents);

}

From source file:gyc.SMOTEBagging.java

License:Open Source License

/**
 * Bagging method./*  w w w.  j a  v  a  2s.c o  m*/
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];
    int b = 0;
    for (int j = 0; j < m_Classifiers.length; j++) {

        //
        int classNum[] = data.attributeStats(data.classIndex()).nominalCounts;
        int minC, nMin = classNum[0];
        int majC, nMaj = classNum[1];
        if (nMin < nMaj) {
            minC = 0;
            majC = 1;
        } else {
            minC = 1;
            majC = 0;
            nMin = classNum[1];
            nMaj = classNum[0];
        }

        b = b + 10;
        Instances bagData = randomSampling(data, majC, minC, b, random);

        /*      // create the in-bag dataset
              if (m_CalcOutOfBag) {
           inBag[j] = new boolean[data.numInstances()];
           bagData = resampleWithWeights(data, random, inBag[j]);
              } else {
           bagData = data.resampleWithWeights(random);
           if (bagSize < data.numInstances()) {
             bagData.randomize(random);
             Instances newBagData = new Instances(bagData, 0, bagSize);
             bagData = newBagData;
           }
              }
                      
              if (m_Classifier instanceof Randomizable) {
           ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
              }*/

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts;
        //System.out.println("after:"+classNum[0]+"-"+classNum[1]);
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric)
                    votes[0] += pred;
                else
                    votes[(int) pred]++;
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                vote = Utils.maxIndex(votes); // majority vote
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:ia02classificacao.IA02Classificacao.java

/**
 * @param args the command line arguments
 *//*  w  w  w.ja  va 2s  .  c o m*/
public static void main(String[] args) throws Exception {

    // abre o banco de dados arff e mostra a quantidade de instancias (linhas)
    DataSource arquivo = new DataSource("data/zoo.arff");
    Instances dados = arquivo.getDataSet();
    System.out.println("Instancias lidas: " + dados.numInstances());

    // FILTER: remove o atributo nome do animal da classificao
    String[] parametros = new String[] { "-R", "1" };
    Remove filtro = new Remove();
    filtro.setOptions(parametros);
    filtro.setInputFormat(dados);
    dados = Filter.useFilter(dados, filtro);

    AttributeSelection selAtributo = new AttributeSelection();
    InfoGainAttributeEval avaliador = new InfoGainAttributeEval();
    Ranker busca = new Ranker();
    selAtributo.setEvaluator(avaliador);
    selAtributo.setSearch(busca);
    selAtributo.SelectAttributes(dados);
    int[] indices = selAtributo.selectedAttributes();
    System.out.println("Selected attributes: " + Utils.arrayToString(indices));

    // Usa o algoritimo J48 e mostra a classificao dos dados em forma textual
    String[] opcoes = new String[1];
    opcoes[0] = "-U";
    J48 arvore = new J48();
    arvore.setOptions(opcoes);
    arvore.buildClassifier(dados);
    System.out.println(arvore);

    // Usa o algoritimo J48 e mostra a classificao de dados em forma grafica
    /*
    TreeVisualizer tv = new TreeVisualizer(null, arvore.graph(), new PlaceNode2());
    JFrame frame = new javax.swing.JFrame("?rvore de Conhecimento");
    frame.setSize(800,500);
    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    frame.getContentPane().add(tv);
    frame.setVisible(true);
    tv.fitToScreen();
    */

    /*
    * Classificao de novos dados
    */

    System.out.println("\n\nCLASSIFICAO DE NOVOS DADOS");
    // criar atributos
    double[] vals = new double[dados.numAttributes()];
    vals[0] = 1.0; // hair
    vals[1] = 0.0; // feathers
    vals[2] = 0.0; // eggs
    vals[3] = 1.0; // milk
    vals[4] = 1.0; // airborne
    vals[5] = 0.0; // aquatic
    vals[6] = 0.0; // predator
    vals[7] = 1.0; // toothed
    vals[8] = 1.0; // backbone
    vals[9] = 1.0; // breathes
    vals[10] = 0.0; // venomous
    vals[11] = 0.0; // fins
    vals[12] = 4.0; // legs
    vals[13] = 1.0; // tail
    vals[14] = 1.0; // domestic
    vals[15] = 1.0; // catsize

    // Criar uma instncia baseada nestes atributos
    Instance meuUnicornio = new DenseInstance(1.0, vals);

    // Adicionar a instncia nos dados
    meuUnicornio.setDataset(dados);

    // Classificar esta nova instncia
    double label = arvore.classifyInstance(meuUnicornio);

    // Imprimir o resultado da classificao
    System.out.println("Novo Animal: Unicrnio");
    System.out.println("classificacao: " + dados.classAttribute().value((int) label));

    /*
    * Avaliao e predio de erros de mtrica
    */
    System.out.println("\n\nAVALIAO E PREDIO DE ERROS DE MTRICA");
    Classifier cl = new J48();
    Evaluation eval_roc = new Evaluation(dados);
    eval_roc.crossValidateModel(cl, dados, 10, new Random(1), new Object[] {});
    System.out.println(eval_roc.toSummaryString());

    /*
    * Matriz de confuso
    */
    System.out.println("\n\nMATRIZ DE CONFUSO");
    double[][] confusionMatrix = eval_roc.confusionMatrix();
    System.out.println(eval_roc.toMatrixString());

}

From source file:ia03classificador.jFrClassificador.java

public void doClassificate() throws Exception {

    // Quando clicado, a variavel recebe 1, quando no clicado recebe 0
    v00 = ((btn00.isSelected()) ? ((double) 1) : ((double) 0));
    v01 = ((btn01.isSelected()) ? ((double) 1) : ((double) 0));
    v02 = ((btn02.isSelected()) ? ((double) 1) : ((double) 0));
    v03 = ((btn03.isSelected()) ? ((double) 1) : ((double) 0));
    v04 = ((btn04.isSelected()) ? ((double) 1) : ((double) 0));
    v05 = ((btn05.isSelected()) ? ((double) 1) : ((double) 0));
    v06 = ((btn06.isSelected()) ? ((double) 1) : ((double) 0));
    v07 = ((btn07.isSelected()) ? ((double) 1) : ((double) 0));
    v08 = ((btn08.isSelected()) ? ((double) 1) : ((double) 0));
    v09 = ((btn09.isSelected()) ? ((double) 1) : ((double) 0));
    v10 = ((btn10.isSelected()) ? ((double) 1) : ((double) 0));
    v11 = ((btn11.isSelected()) ? ((double) 1) : ((double) 0));
    v13 = ((btn13.isSelected()) ? ((double) 1) : ((double) 0));
    v14 = ((btn14.isSelected()) ? ((double) 1) : ((double) 0));
    v15 = ((btn15.isSelected()) ? ((double) 1) : ((double) 0));
    legs = txtLegs.getText();//from  ww  w  . ja v a 2s  . com
    legs = ((legs == null || legs.trim().isEmpty() ? "2" : legs));
    name = txtName.getText();

    // abre o banco de dados arff e guarda os registros no objeto dados
    ConverterUtils.DataSource arquivo = new ConverterUtils.DataSource("data/zoo.arff");
    Instances dados = arquivo.getDataSet();

    // FILTER: remove o atributo nome do animal da classificao
    String[] parametros = new String[] { "-R", "1" };
    Remove filtro = new Remove();
    filtro.setOptions(parametros);
    filtro.setInputFormat(dados);
    dados = Filter.useFilter(dados, filtro);

    AttributeSelection selAtributo = new AttributeSelection();
    InfoGainAttributeEval avaliador = new InfoGainAttributeEval();
    Ranker busca = new Ranker();
    selAtributo.setEvaluator(avaliador);
    selAtributo.setSearch(busca);
    selAtributo.SelectAttributes(dados);
    int[] indices = selAtributo.selectedAttributes();
    //System.out.println("Selected attributes: " + Utils.arrayToString(indices));

    // Usa o algoritimo J48 para montar a arvore de dados
    String[] opcoes = new String[1];
    opcoes[0] = "-U";
    J48 arvore = new J48();
    arvore.setOptions(opcoes);
    arvore.buildClassifier(dados);

    // cria o novo elemento para comparao
    double[] vals = new double[dados.numAttributes()];
    vals[0] = v00; // hair
    vals[1] = v01; // feathers
    vals[2] = v02; // eggs
    vals[3] = v03; // milk
    vals[4] = v04; // airborne
    vals[5] = v05; // aquatic
    vals[6] = v06; // predator
    vals[7] = v07; // toothed
    vals[8] = v08; // backbone
    vals[9] = v09; // breathes
    vals[10] = v10; // venomous
    vals[11] = v11; // fins
    vals[12] = Double.parseDouble(legs); // legs
    vals[13] = v13; // tail
    vals[14] = v14; // domestic
    vals[15] = v15; // catsize

    // Criar uma instncia baseada nestes atributos
    Instance newAnimal = new DenseInstance(1.0, vals);

    // Adicionar a instncia nos dados
    newAnimal.setDataset(dados);

    // Classificar esta nova instncia
    double label = arvore.classifyInstance(newAnimal);

    // Imprimir o resultado da classificao
    lblClassification.setText(dados.classAttribute().value((int) label));

}

From source file:id3.MyID3.java

/**
 * Algoritma pohon keputusan//from   w  w  w . ja v a2  s  .  co m
 * @param instances data train
 * @param attributes remaining attributes
 * @throws Exception
 */
public void buildMyID3(Instances instances, ArrayList<Attribute> attributes) throws Exception {
    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        classAttribute = null;
        classLabel = Instance.missingValue();
        classDistributionAmongInstances = new double[instances.numClasses()];
        return;
    }
    // Check if all instances only contain one class label
    if (computeEntropy(instances) == 0) {
        currentAttribute = null;
        classDistributionAmongInstances = classDistribution(instances);
        // Labelling process at node
        for (int i = 0; i < classDistributionAmongInstances.length; i++) {
            if (classDistributionAmongInstances[i] > 0) {
                classLabel = i;
                break;
            }
        }
        classAttribute = instances.classAttribute();
        Utils.normalize(classDistributionAmongInstances);
    } else {
        // Compute infogain for each attribute
        double[] infoGainAttribute = new double[instances.numAttributes()];
        for (int i = 0; i < instances.numAttributes(); i++) {
            infoGainAttribute[i] = computeIG(instances, instances.attribute(i));
        }
        // Choose attribute with maximum information gain
        int indexMaxInfoGain = 0;
        double maximumInfoGain = 0.0;
        for (int i = 0; i < (infoGainAttribute.length - 1); i++) {
            if (infoGainAttribute[i] > maximumInfoGain) {
                maximumInfoGain = infoGainAttribute[i];
                indexMaxInfoGain = i;
            }
        }
        currentAttribute = instances.attribute(indexMaxInfoGain);
        // Delete current attribute from remaining attribute
        ArrayList<Attribute> remainingAttributes = attributes;
        if (!remainingAttributes.isEmpty()) {
            int indexAttributeDeleted = 0;
            for (int i = 0; i < remainingAttributes.size(); i++) {
                if (remainingAttributes.get(i).index() == currentAttribute.index()) {
                    indexAttributeDeleted = i;
                }
            }
            remainingAttributes.remove(indexAttributeDeleted);
        }
        // Split instances based on currentAttribute (create branch new node)
        Instances[] instancesSplitBasedAttribute = splitData(instances, currentAttribute);
        subTree = new MyID3[currentAttribute.numValues()];
        for (int i = 0; i < currentAttribute.numValues(); i++) {
            if (instancesSplitBasedAttribute[i].numInstances() == 0) {
                // Handle empty examples at nodes
                double[] currentClassDistribution = classDistribution(instances);
                classLabel = 0.0;
                double counterDistribution = 0.0;
                for (int j = 0; j < currentClassDistribution.length; j++) {
                    if (currentClassDistribution[j] > counterDistribution) {
                        classLabel = j;
                    }
                }
                classAttribute = instances.classAttribute();
            } else {
                subTree[i] = new MyID3();
                subTree[i].buildMyID3(instancesSplitBasedAttribute[i], remainingAttributes);
            }
        }
    }
}

From source file:id3j48.WekaAccess.java

public static void classify(String filename, Classifier classifier) throws Exception {
    Instances input = readArff(filename);
    input.setClassIndex(input.numAttributes() - 1);
    for (int i = 0; i < input.numInstances(); i++) {
        double classLabel = classifier.classifyInstance(input.instance(i));
        input.instance(i).setClassValue(classLabel);
        System.out.println("Instance: " + input.instance(i));
        System.out.println("Class: " + input.classAttribute().value((int) classLabel));
    }/*from www  .j a v a  2s .c  om*/

    try (BufferedWriter writer = new BufferedWriter(
            new FileWriter(classifiedFolder + File.separator + filename))) {
        writer.write(input.toString());
        writer.newLine();
        writer.flush();
    }
}