List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:affective.core.ArffLexiconWordLabeller.java
License:Open Source License
/** * Processes all the dictionary files.//www . j a va 2 s. com * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) { this.attributes.add(lexInstances.attribute(i)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!attributes.isEmpty()) { Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>(); for (Attribute na : attributes) { wordVals.put(na, inst.value(na)); } this.attValMap.put(word, wordVals); } } } }
From source file:ann.MyANN.java
/** * mengubah Instance menjadi Data/* w ww.j a v a2s . c om*/ * @param instance Instance yang akan diubah menjadi kelas Data * @return kelas Data dari input */ private Data instanceToData(Instance instance) { ArrayList<Double> input = new ArrayList<>(); ArrayList<Double> target = new ArrayList<>(); for (int j = 0; j < instance.numAttributes() - 1; j++) { input.add(0.0); } if (instance.classAttribute().isNominal()) { for (int j = 0; j < instance.classAttribute().numValues(); j++) { target.add(0.0); } } else { target.add(0.0); } for (int j = 0; j < instance.numAttributes(); j++) { if (j == instance.classIndex()) { if (instance.attribute(j).isNominal()) target.set((int) instance.value(j), 1.0); else target.add(instance.value(j)); } else { input.set(j, instance.value(j)); } } return new Data(input, target); }
From source file:assign00.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {/* w w w . j a va 2s .c o m*/ if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } else { distance += 1; } } } return distance; }
From source file:assign00.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {/*from w w w .j a v a 2 s.c om*/ if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } else { distance += 1; } } } return distance; }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Returns the subset an instance falls into. * //w ww .j a va 2 s . c o m * @param instance the instance to check * @return the subset the instance falls into * @throws Exception if something goes wrong */ private int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_AttIndex)) { return 2; } else if (instance.attribute(m_AttIndex).isNominal()) { if ((int) instance.value(m_AttIndex) == m_SplitPoint) { return 0; } else { return 1; } } else { if (instance.value(m_AttIndex) <= m_SplitPoint) { return 0; } else { return 1; } } }
From source file:br.puc_rio.ele.lvc.interimage.datamining.udf.BayesClassifier.java
License:Apache License
@Override public String exec(Tuple input) throws IOException { if (input == null) return null; if (_trainData == null) { //Reads train data try {/*from ww w. j av a2 s . c o m*/ if (!_trainUrl.isEmpty()) { URL url = new URL(_trainUrl); URLConnection urlConn = url.openConnection(); urlConn.connect(); InputStreamReader inStream = new InputStreamReader(urlConn.getInputStream()); BufferedReader buff = new BufferedReader(inStream); _trainData = _dataParser.parseData(buff); } } catch (Exception e) { throw new IOException("Caught exception reading training data file ", e); } } try { Integer numFeatures = input.size(); double[] testData; testData = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) testData[i] = DataType.toDouble(input.get(i)); Classifier csfr = null; csfr = (Classifier) Class.forName("weka.classifiers.bayes.NaiveBayes").newInstance(); csfr.buildClassifier(_trainData); double classification = 0; Instance myinstance = _trainData.instance(0); for (int i = 0; i < numFeatures; i++) myinstance.setValue(i, testData[i]); classification = csfr.classifyInstance(myinstance); return myinstance.attribute(_trainData.classIndex()).value((int) classification); } catch (Exception e) { throw new IOException("Caught exception processing input row ", e); } }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
public double[] logDensityPerClusterForInstance(Instance inst) throws Exception { int i, j;/*w ww . ja v a 2 s. c om*/ double logprob; double[] wghts = new double[m_num_clusters]; m_replaceMissing.input(inst); inst = m_replaceMissing.output(); for (i = 0; i < m_num_clusters; i++) { // System.err.println("Cluster : "+i); logprob = 0.0; for (j = 0; j < m_num_attribs; j++) { if (!inst.isMissing(j)) { if (inst.attribute(j).isNominal()) { logprob += Math.log(m_model[i][j].getProbability(inst.value(j))); } else { // numeric attribute logprob += logNormalDens(inst.value(j), m_modelNormal[i][j][0], m_modelNormal[i][j][1]); /* * System.err.println(logNormalDens(inst.value(j), * m_modelNormal[i][j][0], m_modelNormal[i][j][1]) + * " "); */ } } } // System.err.println(""); wghts[i] = logprob; } return wghts; }
From source file:Classifiers.BRkNN.java
License:Open Source License
/** * Select the best value for k by hold-one-out cross-validation. Hamming * Loss is minimized//from w ww . j a v a2 s. c om * * @throws Exception Potential exception thrown. To be handled in an upper level. */ private void crossValidate() throws Exception { try { // the performance for each different k double[] hammingLoss = new double[cvMaxK]; for (int i = 0; i < cvMaxK; i++) { hammingLoss[i] = 0; } Instances dataSet = train; Instance instance; // the hold out instance Instances neighbours; // the neighboring instances double[] origDistances, convertedDistances; for (int i = 0; i < dataSet.numInstances(); i++) { if (getDebug() && (i % 50 == 0)) { debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r"); } instance = dataSet.instance(i); neighbours = lnn.kNearestNeighbours(instance, cvMaxK); origDistances = lnn.getDistances(); // gathering the true labels for the instance boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } // calculate the performance metric for each different k for (int j = cvMaxK; j > 0; j--) { convertedDistances = new double[origDistances.length]; System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length); double[] confidences = this.getConfidences(neighbours, convertedDistances); boolean[] bipartition = null; switch (extension) { case NONE: // BRknn MultiLabelOutput results; results = new MultiLabelOutput(confidences, 0.5); bipartition = results.getBipartition(); break; case EXTA: // BRknn-a bipartition = labelsFromConfidences2(confidences); break; case EXTB: // BRknn-b bipartition = labelsFromConfidences3(confidences); break; } double symmetricDifference = 0; // |Y xor Z| for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { boolean actual = trueLabels[labelIndex]; boolean predicted = bipartition[labelIndex]; if (predicted != actual) { symmetricDifference++; } } hammingLoss[j - 1] += (symmetricDifference / numLabels); neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1); } } // Display the results of the cross-validation if (getDebug()) { for (int i = cvMaxK; i > 0; i--) { debug("Hold-one-out performance of " + (i) + " neighbors "); debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances()); } } // Check through the performance stats and select the best // k value (or the lowest k if more than one best) double[] searchStats = hammingLoss; double bestPerformance = Double.NaN; int bestK = 1; for (int i = 0; i < cvMaxK; i++) { if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) { bestPerformance = searchStats[i]; bestK = i + 1; } } numOfNeighbors = bestK; if (getDebug()) { System.err.println("Selected k = " + bestK); } } catch (Exception ex) { throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage()); } }
From source file:Classifiers.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances/* ww w.j av a 2 s.c o m*/ * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); weight = 1.0; weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }
From source file:classify.Classifier.java
public static void missingValuesRows(Instances data) { int[] missingValues = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { missingValues[i] = 0;//from w w w . ja v a 2 s. c o m } Instance example; String value = ""; //get number of missing attributes per row int missValues = 0; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i); for (int j = 0; j < 15; j++) { if (example.attribute(j).isNominal()) { value = example.stringValue(j); } else if (example.attribute(j).isNumeric()) { value = Double.toString(example.value(j)); } if (value.equals("?") || value.equals("NaN")) { missingValues[i]++; missValues++; } } } System.out.println("Number of Missing Values: " + missValues); //get how many times i attributes are missing int[] frequency = new int[15]; for (int i = 0; i < data.numInstances(); i++) { frequency[missingValues[i]]++; } int numRows = 0; for (int i = 0; i < data.numInstances(); i++) { if (missingValues[i] > 0) { numRows++; } } System.out.println("Number of rows with missing values: " + numRows); System.out.println("Number of missing attributes per row:"); for (int i = 0; i < 15; i++) { System.out.println(i + ": " + frequency[i]); } }