List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//* w w w. ja v a 2 s . co m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:general.Util.java
/** * show learning statistic result by percentage split * @param data training data/*from w ww .jav a2 s. com*/ * @param trainPercent percentage of the training data * @param Classifier model */ public static void PercentageSplit(Instances data, double trainPercent, String Classifier) { try { int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100); int testSize = data.numInstances() - trainSize; data.randomize(new Random(1)); Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); switch (Classifier.toLowerCase()) { case "naivebayes": classifier = new NaiveBayes(); break; case "j48-prune": classifier = new MyJ48(true, 0.25f); break; case "j48-unprune": classifier = new MyJ48(false, 0f); break; case "id3": classifier = new MyID3(); break; default: break; } classifier.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { try { double pred = classifier.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } // Start evaluate model using instances test and print results try { Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n\n", false)); } catch (Exception e) { e.printStackTrace(); } } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:general.Util.java
/** * Classify test set using pre-build model * @param model model pathfile//w ww . j av a 2 s . c o m * @param test test file */ public static void doClassify(Classifier model, Instances test) { test.setClassIndex(test.numAttributes() - 1); for (int i = 0; i < test.numInstances(); i++) { try { double pred = model.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());//from w w w . j a va 2 s .c om } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:gr.auth.ee.lcs.data.representations.complex.SingleClassRepresentation.java
License:Open Source License
@Override protected void createClassRepresentation(final Instances instances) { if (instances.classIndex() < 0) instances.setClassIndex(instances.numAttributes() - 1); // Rule Consequents final Enumeration<?> classNames = instances.classAttribute().enumerateValues(); final String[] ruleConsequents = new String[instances.numClasses()]; this.ruleConsequents = ruleConsequents; for (int i = 0; i < instances.numClasses(); i++) ruleConsequents[i] = (String) classNames.nextElement(); attributeList[attributeList.length - 1] = new UniLabel(chromosomeSize, "class", ruleConsequents); }
From source file:gyc.SMOTEBagging.java
License:Open Source License
/** * Bagging method./* w w w. j a v a 2s.c o m*/ * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; int b = 0; for (int j = 0; j < m_Classifiers.length; j++) { // int classNum[] = data.attributeStats(data.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } b = b + 10; Instances bagData = randomSampling(data, majC, minC, b, random); /* // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = resampleWithWeights(data, random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); }*/ // build the classifier m_Classifiers[j].buildClassifier(bagData); //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) votes[0] += pred; else votes[(int) pred]++; } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { vote = Utils.maxIndex(votes); // majority vote } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:ia02classificacao.IA02Classificacao.java
/** * @param args the command line arguments *//* w w w.ja va 2s . c o m*/ public static void main(String[] args) throws Exception { // abre o banco de dados arff e mostra a quantidade de instancias (linhas) DataSource arquivo = new DataSource("data/zoo.arff"); Instances dados = arquivo.getDataSet(); System.out.println("Instancias lidas: " + dados.numInstances()); // FILTER: remove o atributo nome do animal da classificao String[] parametros = new String[] { "-R", "1" }; Remove filtro = new Remove(); filtro.setOptions(parametros); filtro.setInputFormat(dados); dados = Filter.useFilter(dados, filtro); AttributeSelection selAtributo = new AttributeSelection(); InfoGainAttributeEval avaliador = new InfoGainAttributeEval(); Ranker busca = new Ranker(); selAtributo.setEvaluator(avaliador); selAtributo.setSearch(busca); selAtributo.SelectAttributes(dados); int[] indices = selAtributo.selectedAttributes(); System.out.println("Selected attributes: " + Utils.arrayToString(indices)); // Usa o algoritimo J48 e mostra a classificao dos dados em forma textual String[] opcoes = new String[1]; opcoes[0] = "-U"; J48 arvore = new J48(); arvore.setOptions(opcoes); arvore.buildClassifier(dados); System.out.println(arvore); // Usa o algoritimo J48 e mostra a classificao de dados em forma grafica /* TreeVisualizer tv = new TreeVisualizer(null, arvore.graph(), new PlaceNode2()); JFrame frame = new javax.swing.JFrame("?rvore de Conhecimento"); frame.setSize(800,500); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); frame.getContentPane().add(tv); frame.setVisible(true); tv.fitToScreen(); */ /* * Classificao de novos dados */ System.out.println("\n\nCLASSIFICAO DE NOVOS DADOS"); // criar atributos double[] vals = new double[dados.numAttributes()]; vals[0] = 1.0; // hair vals[1] = 0.0; // feathers vals[2] = 0.0; // eggs vals[3] = 1.0; // milk vals[4] = 1.0; // airborne vals[5] = 0.0; // aquatic vals[6] = 0.0; // predator vals[7] = 1.0; // toothed vals[8] = 1.0; // backbone vals[9] = 1.0; // breathes vals[10] = 0.0; // venomous vals[11] = 0.0; // fins vals[12] = 4.0; // legs vals[13] = 1.0; // tail vals[14] = 1.0; // domestic vals[15] = 1.0; // catsize // Criar uma instncia baseada nestes atributos Instance meuUnicornio = new DenseInstance(1.0, vals); // Adicionar a instncia nos dados meuUnicornio.setDataset(dados); // Classificar esta nova instncia double label = arvore.classifyInstance(meuUnicornio); // Imprimir o resultado da classificao System.out.println("Novo Animal: Unicrnio"); System.out.println("classificacao: " + dados.classAttribute().value((int) label)); /* * Avaliao e predio de erros de mtrica */ System.out.println("\n\nAVALIAO E PREDIO DE ERROS DE MTRICA"); Classifier cl = new J48(); Evaluation eval_roc = new Evaluation(dados); eval_roc.crossValidateModel(cl, dados, 10, new Random(1), new Object[] {}); System.out.println(eval_roc.toSummaryString()); /* * Matriz de confuso */ System.out.println("\n\nMATRIZ DE CONFUSO"); double[][] confusionMatrix = eval_roc.confusionMatrix(); System.out.println(eval_roc.toMatrixString()); }
From source file:ia03classificador.jFrClassificador.java
public void doClassificate() throws Exception { // Quando clicado, a variavel recebe 1, quando no clicado recebe 0 v00 = ((btn00.isSelected()) ? ((double) 1) : ((double) 0)); v01 = ((btn01.isSelected()) ? ((double) 1) : ((double) 0)); v02 = ((btn02.isSelected()) ? ((double) 1) : ((double) 0)); v03 = ((btn03.isSelected()) ? ((double) 1) : ((double) 0)); v04 = ((btn04.isSelected()) ? ((double) 1) : ((double) 0)); v05 = ((btn05.isSelected()) ? ((double) 1) : ((double) 0)); v06 = ((btn06.isSelected()) ? ((double) 1) : ((double) 0)); v07 = ((btn07.isSelected()) ? ((double) 1) : ((double) 0)); v08 = ((btn08.isSelected()) ? ((double) 1) : ((double) 0)); v09 = ((btn09.isSelected()) ? ((double) 1) : ((double) 0)); v10 = ((btn10.isSelected()) ? ((double) 1) : ((double) 0)); v11 = ((btn11.isSelected()) ? ((double) 1) : ((double) 0)); v13 = ((btn13.isSelected()) ? ((double) 1) : ((double) 0)); v14 = ((btn14.isSelected()) ? ((double) 1) : ((double) 0)); v15 = ((btn15.isSelected()) ? ((double) 1) : ((double) 0)); legs = txtLegs.getText();//from ww w . ja v a 2s . com legs = ((legs == null || legs.trim().isEmpty() ? "2" : legs)); name = txtName.getText(); // abre o banco de dados arff e guarda os registros no objeto dados ConverterUtils.DataSource arquivo = new ConverterUtils.DataSource("data/zoo.arff"); Instances dados = arquivo.getDataSet(); // FILTER: remove o atributo nome do animal da classificao String[] parametros = new String[] { "-R", "1" }; Remove filtro = new Remove(); filtro.setOptions(parametros); filtro.setInputFormat(dados); dados = Filter.useFilter(dados, filtro); AttributeSelection selAtributo = new AttributeSelection(); InfoGainAttributeEval avaliador = new InfoGainAttributeEval(); Ranker busca = new Ranker(); selAtributo.setEvaluator(avaliador); selAtributo.setSearch(busca); selAtributo.SelectAttributes(dados); int[] indices = selAtributo.selectedAttributes(); //System.out.println("Selected attributes: " + Utils.arrayToString(indices)); // Usa o algoritimo J48 para montar a arvore de dados String[] opcoes = new String[1]; opcoes[0] = "-U"; J48 arvore = new J48(); arvore.setOptions(opcoes); arvore.buildClassifier(dados); // cria o novo elemento para comparao double[] vals = new double[dados.numAttributes()]; vals[0] = v00; // hair vals[1] = v01; // feathers vals[2] = v02; // eggs vals[3] = v03; // milk vals[4] = v04; // airborne vals[5] = v05; // aquatic vals[6] = v06; // predator vals[7] = v07; // toothed vals[8] = v08; // backbone vals[9] = v09; // breathes vals[10] = v10; // venomous vals[11] = v11; // fins vals[12] = Double.parseDouble(legs); // legs vals[13] = v13; // tail vals[14] = v14; // domestic vals[15] = v15; // catsize // Criar uma instncia baseada nestes atributos Instance newAnimal = new DenseInstance(1.0, vals); // Adicionar a instncia nos dados newAnimal.setDataset(dados); // Classificar esta nova instncia double label = arvore.classifyInstance(newAnimal); // Imprimir o resultado da classificao lblClassification.setText(dados.classAttribute().value((int) label)); }
From source file:id3.MyID3.java
/** * Algoritma pohon keputusan//from w w w . ja v a2 s . co m * @param instances data train * @param attributes remaining attributes * @throws Exception */ public void buildMyID3(Instances instances, ArrayList<Attribute> attributes) throws Exception { // Check if no instances have reached this node. if (instances.numInstances() == 0) { classAttribute = null; classLabel = Instance.missingValue(); classDistributionAmongInstances = new double[instances.numClasses()]; return; } // Check if all instances only contain one class label if (computeEntropy(instances) == 0) { currentAttribute = null; classDistributionAmongInstances = classDistribution(instances); // Labelling process at node for (int i = 0; i < classDistributionAmongInstances.length; i++) { if (classDistributionAmongInstances[i] > 0) { classLabel = i; break; } } classAttribute = instances.classAttribute(); Utils.normalize(classDistributionAmongInstances); } else { // Compute infogain for each attribute double[] infoGainAttribute = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); i++) { infoGainAttribute[i] = computeIG(instances, instances.attribute(i)); } // Choose attribute with maximum information gain int indexMaxInfoGain = 0; double maximumInfoGain = 0.0; for (int i = 0; i < (infoGainAttribute.length - 1); i++) { if (infoGainAttribute[i] > maximumInfoGain) { maximumInfoGain = infoGainAttribute[i]; indexMaxInfoGain = i; } } currentAttribute = instances.attribute(indexMaxInfoGain); // Delete current attribute from remaining attribute ArrayList<Attribute> remainingAttributes = attributes; if (!remainingAttributes.isEmpty()) { int indexAttributeDeleted = 0; for (int i = 0; i < remainingAttributes.size(); i++) { if (remainingAttributes.get(i).index() == currentAttribute.index()) { indexAttributeDeleted = i; } } remainingAttributes.remove(indexAttributeDeleted); } // Split instances based on currentAttribute (create branch new node) Instances[] instancesSplitBasedAttribute = splitData(instances, currentAttribute); subTree = new MyID3[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { if (instancesSplitBasedAttribute[i].numInstances() == 0) { // Handle empty examples at nodes double[] currentClassDistribution = classDistribution(instances); classLabel = 0.0; double counterDistribution = 0.0; for (int j = 0; j < currentClassDistribution.length; j++) { if (currentClassDistribution[j] > counterDistribution) { classLabel = j; } } classAttribute = instances.classAttribute(); } else { subTree[i] = new MyID3(); subTree[i].buildMyID3(instancesSplitBasedAttribute[i], remainingAttributes); } } } }
From source file:id3j48.WekaAccess.java
public static void classify(String filename, Classifier classifier) throws Exception { Instances input = readArff(filename); input.setClassIndex(input.numAttributes() - 1); for (int i = 0; i < input.numInstances(); i++) { double classLabel = classifier.classifyInstance(input.instance(i)); input.instance(i).setClassValue(classLabel); System.out.println("Instance: " + input.instance(i)); System.out.println("Class: " + input.classAttribute().value((int) classLabel)); }/*from www .j a v a 2s .c om*/ try (BufferedWriter writer = new BufferedWriter( new FileWriter(classifiedFolder + File.separator + filename))) { writer.write(input.toString()); writer.newLine(); writer.flush(); } }