List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:miRdup.WekaModule.java
License:Open Source License
public static void attributeSelection(File arff, String outfile) { // load data// w w w . j a va2 s . c om try { PrintWriter pw = new PrintWriter(new FileWriter(outfile)); DataSource source = new DataSource(arff.toString()); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } AttributeSelection attrsel = new AttributeSelection(); weka.attributeSelection.InfoGainAttributeEval eval = new weka.attributeSelection.InfoGainAttributeEval(); weka.attributeSelection.Ranker rank = new weka.attributeSelection.Ranker(); rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N -1")); if (Main.debug) { System.out.print("Model options: " + rank.getClass().getName().trim() + " "); } for (String s : rank.getOptions()) { System.out.print(s + " "); } attrsel.setEvaluator(eval); attrsel.setSearch(rank); attrsel.setFolds(10); attrsel.SelectAttributes(data); //attrsel.CrossValidateAttributes(); System.out.println(attrsel.toResultsString()); pw.println(attrsel.toResultsString()); //evaluation.crossValidateModel(classifier, data, 10, new Random(1)); pw.flush(); pw.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * //from www . j a v a 2 s .c o m * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:moa.classifiers.AbstractClassifier.java
License:Open Source License
/** * Gets the index of the attribute in a set of instances, * given the index of the attribute in the learner. * //from w w w .j av a2s. com * @param index the index of the attribute in the learner * @param insts the instances * @return the index of the attribute in the instances */ protected static int modelAttIndexToInstanceAttIndex(int index, Instances insts) { return insts.classIndex() > index ? index : index + 1; }
From source file:moa.classifiers.macros.TACNB.java
License:Open Source License
public void initHeader(Instances dataset) { int numLabels = this.numOldLabelsOption.getValue(); Attribute target = dataset.classAttribute(); List<String> possibleValues = new ArrayList<String>(); int n = target.numValues(); for (int i = 0; i < n; i++) { possibleValues.add(target.value(i)); }//from w w w . j a v a2 s .c o m ArrayList<Attribute> attrs = new ArrayList<Attribute>(numLabels + dataset.numAttributes()); for (int i = 0; i < numLabels; i++) { attrs.add(new Attribute(target.name() + "_" + i, possibleValues)); } for (int i = 0; i < dataset.numAttributes(); i++) { attrs.add((Attribute) dataset.attribute(i).copy()); } this.header = new Instances("extended_" + dataset.relationName(), attrs, 0); this.header.setClassIndex(numLabels + dataset.classIndex()); }
From source file:moa.classifiers.novelClass.AbstractNovelClassClassifier.java
License:Apache License
final public static Instances augmentInstances(Instances datum) { ArrayList<Attribute> attInfo = new ArrayList<>(datum.numAttributes()); for (int aIdx = 0; aIdx < datum.numAttributes(); aIdx++) { Attribute a = datum.attribute(aIdx).copy(datum.attribute(aIdx).name()); if ((aIdx == datum.classIndex()) && (a.indexOfValue(NOVEL_LABEL_STR) < 0)) { // only if we don't already have these List<String> values = new ArrayList<>(a.numValues() + 2); for (int i = 0; i < a.numValues(); ++i) { values.add(a.value(i));/* w w w . ja va2 s . co m*/ } values.add(OUTLIER_LABEL_STR); values.add(NOVEL_LABEL_STR); a = new Attribute(a.name(), values, a.getMetadata()); } attInfo.add(a); } String relationshipName = NOVEL_CLASS_INSTANCE_RELATIONSHIP_TYPE + "-" + datum.relationName(); Instances ret = new Instances(relationshipName, attInfo, 1); ret.setClassIndex(datum.classIndex()); return ret; }
From source file:moa.classifiers.rules.GeRules.java
License:Open Source License
public static void main(String[] args) throws Exception { // TODO Auto-generated method stub //ArffFileStream arffFileStream = new ArffFileStream("resources/UCI_KDD/nominal/cmc.arff", -1); // read arff file WEKA way DataSource source = new DataSource("data/cmc.arff"); // stream generator RandomTreeGenerator treeGenerator = new RandomTreeGenerator(); treeGenerator.numClassesOption.setValue(5); treeGenerator.numNumericsOption.setValue(0); treeGenerator.prepareForUse();//from w ww .ja v a 2s .c o m // HoeffdingRules classifier GeRules gErules = new GeRules(); gErules.prepareForUse(); // load data into instances set Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); // Using Prism classifier //hoeffdingRules.learnRules(Collections.list(data.enumerateInstances())); for (Instance instance : Collections.list(data.enumerateInstances())) { gErules.trainOnInstanceImpl(instance); gErules.correctlyClassifies(instance); } Instance anInstance = Collections.list(data.enumerateInstances()).get(10); System.out.println(anInstance); for (Rule aRule : gErules.RulesCoveredInstance(anInstance)) { System.out.println(aRule.printRule()); } for (Rule aRule : gErules.rulesList) { System.out.println(aRule.printRule()); } }
From source file:moa.clusterers.AbstractClusterer.java
License:Open Source License
protected static int modelAttIndexToInstanceAttIndex(int index, Instances insts) { return insts.classIndex() > index ? index : index + 1; }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
/** * Creates a copy of the instances and redefines the problem such that it is * now important to classify the wrongly classified instances *//*from w ww.jav a 2 s . c o m*/ private Instances redefineProblem(Instances data) { Instances redefInstances = new Instances(data); // deep copy of instance store // System.out.println(reDefinedClasses.attributeStats(reDefinedClasses.classIndex())); // System.out.println("Before filtering: "+wrongData.size()); double predictedClass = 0; int oldClassIndex = redefInstances.classIndex(); try { Iterator inst = redefInstances.iterator(); int num_instances = 0; int num_patch = 0; int num_base = 0; while (inst.hasNext()) { weka.core.Instance a = (weka.core.Instance) inst.next(); predictedClass = this.baseClassifier.classifyInstance(a); // Achtung: das hier muss "base" bleiben!! if (predictedClass == a.classValue()) { a.setClassValue(1); if (num_instances < batchSize.getValue()) { num_base++; } } else { a.setClassValue(0); if (num_instances < batchSize.getValue()) { num_patch++; } } num_instances++; } System.out.println("Patchklassifizierer: " + (float) num_patch / batchSize.getValue() * 100); System.out.println("Baselassifizierer: " + (float) num_base / batchSize.getValue() * 100); if (this.useBaseClassAsAttribute.isSet()) { redefInstances = addBaseClassToInstances(redefInstances); } redefInstances = changeClassToWrongRight(redefInstances); } catch (Exception e) { System.err.println("Error while classifying instance in redefineProblem"); System.err.println(e.getMessage()); System.err.println(e.fillInStackTrace()); System.exit(987654); } return redefInstances; }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
/** * Modify the instances and insert into them the class which the base * classifier had them classified as./*www. ja v a2s .com*/ * * @return */ private Instances addBaseClassToInstances(Instances origInstances) { Instances moddedInstances = new Instances(origInstances); // deep copy double predictedClass = 0; // create new attribute try { moddedInstances = copyClassAttribute(moddedInstances, "baseLabel", 1); // das was hier attribute 1 ist, wird zu index 0 moddedInstances.setClassIndex(origInstances.classIndex() + 1); } catch (Exception e) { System.err.println("Error while copying class Attribute for baseLabel"); System.err.println(e.getMessage()); } Iterator inst = origInstances.iterator(); int index = 0; while (inst.hasNext()) { weka.core.Instance a = (weka.core.Instance) inst.next(); weka.core.Instance target = moddedInstances.instance(index); predictedClass = 0; try { predictedClass = this.baseClassifier.classifyInstance(a); // Achtung: das hier muss "base" bleiben!! } catch (Exception e) { System.err.println("Error while classifying instance in addBaseClassToInstances"); System.err.println(a); System.err.println(e.getMessage()); } target.setValue(0, predictedClass); // index 0 ist attribute 1 index++; } return moddedInstances; }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
/** * Copies the class attribute to another position (first position) * * @param instances/*ww w .j a va 2 s.com*/ * @param newName * @param newAttributeIndex * @return * @throws Exception */ public static Instances copyClassAttribute(Instances instances, String newName, int newAttributeIndex) throws Exception { int whichAttribute = instances.classIndex(); Add filter = new Add(); filter.setAttributeIndex("" + newAttributeIndex); filter.setAttributeName(newName); // Copy nominal Attribute if (instances.attribute(whichAttribute).isNominal()) { String newNominalLabels = ""; Boolean first = true; Enumeration<Object> o = instances.attribute(whichAttribute).enumerateValues(); while (o.hasMoreElements()) { String s = (String) o.nextElement(); if (!first) { newNominalLabels += ","; } newNominalLabels += s; first = false; } filter.setNominalLabels(newNominalLabels); } filter.setInputFormat(instances); instances = Filter.useFilter(instances, filter); return instances; }