List of usage examples for weka.core Instance weight
public double weight();
From source file:j48.Distribution.java
License:Open Source License
/** * Shifts given instance from one bag to another one. * * @exception Exception if something goes wrong *///from w ww. j av a2 s . c o m public final void shift(int from, int to, Instance instance) throws Exception { int classIndex; double weight; classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; }
From source file:j48.Distribution.java
License:Open Source License
/** * Shifts all instances in given range from one bag to another one. * * @exception Exception if something goes wrong *///from w ww. jav a 2 s . c o m public final void shiftRange(int from, int to, Instances source, int startIndex, int lastPlusOne) throws Exception { int classIndex; double weight; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; } }
From source file:j48.GraftSplit.java
License:Open Source License
/** * builds m_graftdistro using the passed data * * @param data the instances to use when creating the distribution *///from w w w . ja va 2s .com public void buildClassifier(Instances data) throws Exception { // distribution for the graft, not counting cases in atbop, only orig leaf m_graftdistro = new Distribution(2, data.numClasses()); // which subset are we looking at for the graft? int subset = subsetOfInterest(); // this is the subset for m_leaf double thisNodeCount = 0; double knownCases = 0; boolean allKnown = true; // populate distribution for (int x = 0; x < data.numInstances(); x++) { Instance instance = data.instance(x); if (instance.isMissing(m_attIndex)) { allKnown = false; continue; } knownCases += instance.weight(); int subst = whichSubset(instance); if (subst == -1) continue; m_graftdistro.add(subst, instance); if (subst == subset) { // instance belongs at m_leaf thisNodeCount += instance.weight(); } } double factor = (knownCases == 0) ? (1.0 / (double) 2.0) : (thisNodeCount / knownCases); if (!allKnown) { for (int x = 0; x < data.numInstances(); x++) { if (data.instance(x).isMissing(m_attIndex)) { Instance instance = data.instance(x); int subst = whichSubset(instance); if (subst == -1) continue; instance.setWeight(instance.weight() * factor); m_graftdistro.add(subst, instance); } } } // if there are no cases at the leaf, make sure the desired // class is chosen, by setting counts to 0.01 if (m_graftdistro.perBag(subset) == 0) { double[] counts = new double[data.numClasses()]; counts[m_maxClass] = 0.01; m_graftdistro.add(subset, counts); } if (m_graftdistro.perBag((subset == 0) ? 1 : 0) == 0) { double[] counts = new double[data.numClasses()]; counts[(int) m_otherLeafMaxClass] = 0.01; m_graftdistro.add((subset == 0) ? 1 : 0, counts); } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong *//* w w w. j a v a 2 s . com*/ private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instance instance; Instances[] trainingSets = new Instances[m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { trainingSets[i] = new Instances(trainInstances, 0); } /* m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); */ int subset; for (int i = 0; i < trainInstances.numInstances(); i++) { instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset > -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { try { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } catch (Exception ex) { ex.printStackTrace(); System.err.println("*** " + m_complexityIndex); System.err.println(weights.length); System.exit(1); } } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ /* // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { // m_distribution.add((int)instance.value(m_attIndex),instance); trainingSets[(int)instances.value(m_attIndex)].add(instance); } else { // add these to the error count m_errors += instance.weight(); } } */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() >= 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { // if fewer than min obj then just count them as errors for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if there are at least five instances in at least two of the subsets // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on numeric attribute./*from w w w . j av a 2s . c om*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instances[] trainingSets = new Instances[m_complexityIndex]; trainingSets[0] = new Instances(trainInstances, 0); trainingSets[1] = new Instances(trainInstances, 0); int subset = -1; // populate the subsets for (int i = 0; i < trainInstances.numInstances(); i++) { Instance instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset != -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() > 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if minimum number of Instances in at least two // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Builds the classifier.// ww w . j av a2 s .com */ private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == m_DocumentAtt) { atts.addElement(new Attribute("TFxIDF")); atts.addElement(new Attribute("First_occurrence")); if (m_KFused) { atts.addElement(new Attribute("Keyphrase_frequency")); } } else if (i == m_KeyphrasesAtt) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } } m_ClassifierData = new Instances("ClassifierData", atts, 0); m_ClassifierData.setClassIndex(m_NumFeatures); if (m_Debug) { System.err.println("--- Converting instances for classifier"); } // Convert pending input instances into data for classifier for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(m_KeyphrasesAtt); HashMap hashKeyphrases = getGivenKeyphrases(keyphrases, false); HashMap hashKeysEval = getGivenKeyphrases(keyphrases, true); // Get the phrases for the document HashMap hash = new HashMap(); int length = getPhrases(hash, current.stringValue(m_DocumentAtt)); // Compute the feature values for each phrase and // add the instance to the data for the classifier Iterator it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = (String) it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length); Instance inst = new Instance(current.weight(), vals); m_ClassifierData.add(inst); } } if (m_Debug) { System.err.println("--- Building classifier"); } // Build classifier FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new NaiveBayesSimple()); fclass.setFilter(new Discretize()); m_Classifier = fclass; m_Classifier.buildClassifier(m_ClassifierData); if (m_Debug) { System.err.println(m_Classifier); } // Save space m_ClassifierData = new Instances(m_ClassifierData, 0); }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Converts an instance.//from ww w . j av a2 s .co m */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); if (m_Debug) { System.err.println("-- Converting instance"); } // Get the key phrases for the document HashMap hashKeyphrases = null; HashMap hashKeysEval = null; if (!instance.isMissing(m_KeyphrasesAtt)) { String keyphrases = instance.stringValue(m_KeyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases, false); hashKeysEval = getGivenKeyphrases(keyphrases, true); } // Get the phrases for the document HashMap hash = new HashMap(); int length = getPhrases(hash, instance.stringValue(m_DocumentAtt)); // Compute number of extra attributes int numFeatures = 5; if (m_Debug) { if (m_KFused) { numFeatures = numFeatures + 1; } } // Set indices of key attributes int phraseAttIndex = m_DocumentAtt; int tfidfAttIndex = m_DocumentAtt + 2; int distAttIndex = m_DocumentAtt + 3; int probsAttIndex = m_DocumentAtt + numFeatures - 1; // Go through the phrases and convert them into instances Iterator it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = (String) it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, training, hashKeysEval, hashKeyphrases, length); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(m_ClassifierData); // Get probability of phrase being key phrase double[] probs = m_Classifier.distributionForInstance(inst); double prob = probs[1]; // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) phraseInfo.elementAt(2)); newInst[pos++] = index; // Add TFxIDF newInst[pos++] = inst.value(m_TfidfIndex); // Add distance newInst[pos++] = inst.value(m_FirstOccurIndex); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = inst.value(m_KeyFreqIndex); } } // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); } // Add dummy instances for keyphrases that don't occur // in the document if (hashKeysEval != null) { Iterator phrases = hashKeysEval.keySet().iterator(); while (phrases.hasNext()) { String phrase = (String) phrases.next(); double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = (double) index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) hashKeysEval.get(phrase)); newInst[pos++] = (double) index; // Add TFxIDF newInst[pos++] = Instance.missingValue(); // Add distance newInst[pos++] = Instance.missingValue(); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = Instance.missingValue(); } } // Add probability and rank newInst[pos++] = -Double.MAX_VALUE; newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = 1; // Keyphrase } else { newInst[pos++] = instance.value(i); } } Instance inst = new Instance(instance.weight(), newInst); inst.setDataset(outputFormatPeek()); vector.addElement(inst); } } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } String val = currentInstance.stringValue(phraseAttIndex); boolean foundSuperphrase = false; for (int j = startInd - 1; j >= 0; j--) { if (j != i) { Instance candidate = (Instance) vector.elementAt(j); String potSuperphrase = candidate.stringValue(phraseAttIndex); if (val.length() <= potSuperphrase.length()) { if (KEAFilter.contains(val, potSuperphrase)) { foundSuperphrase = true; break; } } } } if (foundSuperphrase) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); } else { currentInstance.setValue(probsAttIndex + 1, rank++); } } return vector; }
From source file:kea.KEAPhraseFilter.java
License:Open Source License
/** * Converts an instance by removing all non-alphanumeric characters * from its string attribute values.//from w w w . j ava2s . c o m */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if (!instance.attribute(i).isString() || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { if (!m_SelectCols.isInRange(i)) { int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i)); instVals[i] = (double) index; continue; } String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); int j = 0; boolean phraseStart = true; boolean seenNewLine = false; boolean haveSeenHyphen = false; boolean haveSeenSlash = false; while (j < str.length()) { boolean isWord = false; boolean potNumber = false; int startj = j; while (j < str.length()) { char ch = str.charAt(j); if (Character.isLetterOrDigit(ch)) { potNumber = true; if (Character.isLetter(ch)) { isWord = true; } j++; } else if ((!m_DisallowInternalPeriods && (ch == '.')) || (ch == '@') || (ch == '_') || (ch == '&') || (ch == '/') || (ch == '-')) { if ((j > 0) && (j + 1 < str.length()) && Character.isLetterOrDigit(str.charAt(j - 1)) && Character.isLetterOrDigit(str.charAt(j + 1))) { j++; } else { break; } } else if (ch == '\'') { if ((j > 0) && Character.isLetterOrDigit(str.charAt(j - 1))) { j++; } else { break; } } else { break; } } if (isWord == true) { if (!phraseStart) { if (haveSeenHyphen) { resultStr.append('-'); } else if (haveSeenSlash) { resultStr.append('/'); } else { resultStr.append(' '); } } resultStr.append(str.substring(startj, j)); if (j == str.length()) { break; } phraseStart = false; seenNewLine = false; haveSeenHyphen = false; haveSeenSlash = false; if (Character.isWhitespace(str.charAt(j))) { if (str.charAt(j) == '\n') { seenNewLine = true; } } else if (str.charAt(j) == '-') { haveSeenHyphen = true; } else if (str.charAt(j) == '/') { haveSeenSlash = true; } else { phraseStart = true; resultStr.append('\n'); } j++; } else if (j == str.length()) { break; } else if (str.charAt(j) == '\n') { if (seenNewLine) { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } } else if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } seenNewLine = true; j++; } else if (Character.isWhitespace(str.charAt(j))) { if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } j++; } else { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } j++; } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found./*from w w w . j ava 2 s. c om*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:lascer.WekaClassifier.java
License:Open Source License
/** * Generates the classifier.//from w w w. j ava 2 s . com * * @param data the data to be used. * * @exception Exception if the classifier can't built successfully. */ public void buildClassifier(Instances data) throws Exception { weka.coreExtended.Instances extendedInstances; weka.coreExtended.BasicInstance extInst; weka.coreExtended.BasicAttribute classAttribut; de.unistuttgart.commandline.Option formelnArtOption; de.unistuttgart.commandline.Option formelnKlasseOption; de.unistuttgart.commandline.Option loggingSwitch; Instance readInst; Beispieldaten invDatensatz; StringReader stringReader; Enumeration instEnum; Enumeration attribEnum; PraedErzParameter praedErzParameter = null; KonzErzParameter konzErzParameter = null; Pruning pruning; String formelArt; String formelKlasse; String optionWert; float posPruneAnt, negPruneAnt; int instNumber; boolean unbekannteWertBsp; Steuerung.parseArguments(parser); formelArt = Konstanten.WEKA_FORMEL_ART; formelnArtOption = parser.getOption("formelArt"); if (parser.isEnabled(formelnArtOption)) { optionWert = parser.getParameter(formelnArtOption); if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) { System.err.println("Wert der Option formelArt unzulssig"); System.err.println("Zulssig: " + formelnArtOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelArt = optionWert; } formelKlasse = Konstanten.WEKA_FORMEL_KLASSE; formelnKlasseOption = parser.getOption("formelKlasse"); if (parser.isEnabled(formelnKlasseOption)) { optionWert = parser.getParameter(formelnKlasseOption); if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste") && !optionWert.equals("beide")) { System.err.println("Wert der Option formelKlasse unzulssig"); System.err.println("Zulssig: " + formelnKlasseOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelKlasse = optionWert; } loggingSwitch = parser.getOption("logging"); if (debugMode || parser.isEnabled(loggingSwitch)) { Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL); } // Ermittlung der Parameter. unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser); posPruneAnt = Steuerung.posPruneAnteil(parser); negPruneAnt = Steuerung.negPruneAnteil(parser); praedErzParameter = Steuerung.praedErzParameter(parser); konzErzParameter = Steuerung.konzErzParameter(parser); // Einlesen der Daten und Erzeugung des Instanzen-Objekts. instNumber = data.numInstances(); stringReader = new StringReader(data.toString()); extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber); instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { readInst = (Instance) instEnum.nextElement(); extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray()); extendedInstances.addBasicInstance(extInst); } // Erzeugung der Datenstze. posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp); negDatensatz = posDatensatz.kopie(true); // Erzeugung der Liste der Attribute. attributListe = new LinkedList(); attribEnum = extendedInstances.enumerateBasicAttributes(); while (attribEnum.hasMoreElements()) { attributListe.add(attribEnum.nextElement()); } // Ermittlung der Werte der Klassifikation. classAttribut = extendedInstances.basicClassAttribute(); wekaClassTrue = classAttribut.indexOfValue("true"); wekaClassFalse = classAttribut.indexOfValue("false"); // Die Formel zur Klasse der positiven Beispiele erzeugen. if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } // Die Formel zur Klasse der negativen Beispiele erzeugen. if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (formelKlasse.equals("beste")) { // Die schlechtere Formel lschen. if (negFormel.istBesser(posFormel)) { posFormel = null; } else { negFormel = null; } } if ((posPruneAnt > 0) || (negPruneAnt > 0)) { pruning = new Pruning(); if (posFormel != null) { posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt); posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (negFormel != null) { negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt); negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } } }