List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//*w w w . j a va 2 s . co m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:general.Util.java
/** * show learning statistic result by using test sets * @param testPath test path file/*from w ww .ja v a 2 s . co m*/ * @param typeTestFile test file */ public static void TestSchema(String testPath, String typeTestFile) { Instances testsets = null; // Load test instances based on file type and path if (typeTestFile.equals("arff")) { FileReader file = null; try { file = new FileReader(testPath); try (BufferedReader reader = new BufferedReader(file)) { testsets = new Instances(reader); } // setting class attribute testsets.setClassIndex(data.numAttributes() - 1); } catch (IOException ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (file != null) { file.close(); } } catch (IOException ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } } else if (typeTestFile.equals("csv")) { try { CSVLoader csv = new CSVLoader(); csv.setFile(new File(testPath)); data = csv.getDataSet(); // setting class attribute data.setClassIndex(data.numAttributes() - 1); } catch (IOException ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } // Start evaluate model using instances test and print results try { Evaluation eval = new Evaluation(Util.getData()); eval.evaluateModel(Util.getClassifier(), testsets); System.out.println(eval.toSummaryString("\nResults\n\n", false)); } catch (Exception e) { e.printStackTrace(); } }
From source file:general.Util.java
/** * show learning statistic result by percentage split * @param data training data/*from w w w .jav a 2 s.c om*/ * @param trainPercent percentage of the training data * @param Classifier model */ public static void PercentageSplit(Instances data, double trainPercent, String Classifier) { try { int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100); int testSize = data.numInstances() - trainSize; data.randomize(new Random(1)); Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); switch (Classifier.toLowerCase()) { case "naivebayes": classifier = new NaiveBayes(); break; case "j48-prune": classifier = new MyJ48(true, 0.25f); break; case "j48-unprune": classifier = new MyJ48(false, 0f); break; case "id3": classifier = new MyID3(); break; default: break; } classifier.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { try { double pred = classifier.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } // Start evaluate model using instances test and print results try { Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n\n", false)); } catch (Exception e) { e.printStackTrace(); } } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:general.Util.java
/** * Classify test set using pre-build model * @param model model pathfile/*from w ww .j av a 2s .c o m*/ * @param test test file */ public static void doClassify(Classifier model, Instances test) { test.setClassIndex(test.numAttributes() - 1); for (int i = 0; i < test.numInstances(); i++) { try { double pred = model.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
public static Instances createDataset(final Collection<Document> documents) { // Key = feature name | Value = number representing NUMERIC, NOMINAL, etc. final Map<String, Integer> featureType = new TreeMap<String, Integer>(); // Key = feature name | Values = distinct values for NOMINAL values final Map<String, Set<String>> nominalFeatureMap = new HashMap<String, Set<String>>(); final Set<String> labels = new TreeSet<String>(); final Set<String> docIds = new TreeSet<String>(); // First scan -- determine attribute values for (Document document : documents) { processFeatures(document.getFeatures(), featureType, nominalFeatureMap); docIds.add(document.getName());/*from www. ja va 2s . c om*/ for (Line line : document.getLines()) { processFeatures(line.getFeatures(), featureType, nominalFeatureMap); labels.add(line.getLabel()); } } final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); // Add Document and Line IDs as first two attributes //final Attribute docId = new Attribute(DOC_ID, (ArrayList<String>) null); final Attribute docId = new Attribute(DOC_ID, new ArrayList<String>(docIds)); final Attribute lineId = new Attribute(LINE_ID); attributes.add(docId); attributes.add(lineId); // Build attributes for (String feature : featureType.keySet()) { final int type = featureType.get(feature); if (type == Attribute.NUMERIC) { attributes.add(new Attribute(feature)); } else { if (nominalFeatureMap.containsKey(feature)) { attributes.add(new Attribute(feature, new ArrayList<String>(nominalFeatureMap.get(feature)))); } } } // Add class attribute Attribute classAttr = new Attribute(LABEL, new ArrayList<String>(labels)); attributes.add(classAttr); final Instances instances = new Instances("train", attributes, documents.size()); // Second scan -- add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attributes.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); instance.setValue(classAttr, line.getLabel()); for (Attribute attribute : attributes) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attributes.size() - 1); return instances; }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());//from ww w .ja va 2s.co m } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java
License:Open Source License
/** * Load instances into the global train store and create test set. * /* w ww.j a va 2 s. c om*/ * @param filename * the .arff filename to be used * @param generateTestSet * true if a test set is going to be generated * @throws IOException * if the input file is not found */ public final void loadInstances(final String filename, final boolean generateTestSet) throws IOException { // Open .arff final Instances set = InstancesUtility.openInstance(filename); if (set.classIndex() < 0) { set.setClassIndex(set.numAttributes() - 1); } set.randomize(new Random()); if (generateTestSet) { final int numOfFolds = (int) SettingsLoader.getNumericSetting("NumberOfFolds", 10); final int fold = (int) Math.floor(Math.random() * numOfFolds); trainSet = set.trainCV(numOfFolds, fold); testSet = set.testCV(numOfFolds, fold); } else { trainSet = set; } myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet); myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet); }
From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java
License:Open Source License
/** * Load instances into the global train store and create test set. * //w ww. j ava 2 s .com * @param filename * the .arff filename to be used * @param testFile * the test file to be loaded * @throws IOException * if the input file is not found */ public final void loadInstancesWithTest(final String filename, final String testFile) throws IOException { // Open .arff final Instances set = InstancesUtility.openInstance(filename); if (set.classIndex() < 0) set.setClassIndex(set.numAttributes() - 1); set.randomize(new Random()); trainSet = set; myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet); myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet); testSet = InstancesUtility.openInstance(testFile); myLcs.trainSet = trainSet; myLcs.testSet = testSet; myLcs.testInstances = InstancesUtility.convertIntancesToDouble(testSet); System.out.println("Label cardinality: " + myLcs.labelCardinality); }
From source file:gr.auth.ee.lcs.data.representations.complex.SingleClassRepresentation.java
License:Open Source License
@Override protected void createClassRepresentation(final Instances instances) { if (instances.classIndex() < 0) instances.setClassIndex(instances.numAttributes() - 1); // Rule Consequents final Enumeration<?> classNames = instances.classAttribute().enumerateValues(); final String[] ruleConsequents = new String[instances.numClasses()]; this.ruleConsequents = ruleConsequents; for (int i = 0; i < instances.numClasses(); i++) ruleConsequents[i] = (String) classNames.nextElement(); attributeList[attributeList.length - 1] = new UniLabel(chromosomeSize, "class", ruleConsequents); }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * Splits the .arff input dataset to |number-of-distinct-label-combinations| Instances which are stored in the partitions[] array. * Called by initializePopulation() as a preparatory step to clustering. * @throws Exception /*www. j ava 2 s . c o m*/ * * */ public static Instances[] partitionInstances(final AbstractLearningClassifierSystem lcs, final String filename) throws Exception { // Open .arff final Instances set = InstancesUtility.openInstance(filename); if (set.classIndex() < 0) { set.setClassIndex(set.numAttributes() - 1); } //set.randomize(new Random()); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); // the partitions vector holds the indices String stringsArray[] = new String[lcs.instances.length]; int indicesArray[] = new int[lcs.instances.length]; // convert each instance's labelset into a string and store it in the stringsArray array for (int i = 0; i < set.numInstances(); i++) { stringsArray[i] = ""; indicesArray[i] = i; for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) { stringsArray[i] += (int) set.instance(i).value(j); } } // contains the indicesVector(s) Vector<Vector> mothershipVector = new Vector<Vector>(); String baseString = ""; for (int i = 0; i < set.numInstances(); i++) { baseString = stringsArray[i]; if (baseString.equals("")) continue; Vector<Integer> indicesVector = new Vector<Integer>(); for (int j = 0; j < set.numInstances(); j++) { if (baseString.equals(stringsArray[j])) { stringsArray[j] = ""; indicesVector.add(j); } } mothershipVector.add(indicesVector); } Instances[] partitions = new Instances[mothershipVector.size()]; for (int i = 0; i < mothershipVector.size(); i++) { partitions[i] = new Instances(set, mothershipVector.elementAt(i).size()); for (int j = 0; j < mothershipVector.elementAt(i).size(); j++) { Instance instanceToAdd = set.instance((Integer) mothershipVector.elementAt(i).elementAt(j)); partitions[i].add(instanceToAdd); } } /* * up to here, the partitions array has been formed. it contains the split dataset by label combinations * it holds both the attributes and the labels, but for clustering the input should only be the attributes, * so we need to delete the labels. this is taken care of by initializePopulation() */ return partitions; }