List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java
License:Open Source License
/** * Callback for best measure so far//from w w w . j a v a2 s . c o m */ @Override public void newBest(double val, OptData opd) { int cnt = 0; int[] weights = getWeights(opd); Instances newInstances = new Instances(getInstances()); for (int i = 0; i < getInstances().numInstances(); i++) { Instance in = newInstances.instance(i); cnt = 0; for (int a = 0; a < getInstances().numAttributes(); a++) { if (a == getInstances().classIndex()) continue; if (weights[cnt++] == 0) { in.setValue(a, 0); } else { in.setValue(a, in.value(a)); } } } try { File file = new File(getOutputDirectory().getAbsolutePath() + File.separator + Double.toString(getMeasure().adjust(val)) + ".arff"); file.createNewFile(); Writer writer = new BufferedWriter(new FileWriter(file)); Instances header = new Instances(newInstances, 0); // remove filter setup Remove remove = new Remove(); remove.setAttributeIndices(getRemoveAsString(weights)); remove.setInvertSelection(true); header.setRelationName(OptionUtils.getCommandLine(remove)); writer.write(header.toString()); writer.write("\n"); for (int i = 0; i < newInstances.numInstances(); i++) { writer.write(newInstances.instance(i).toString()); writer.write("\n"); } writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer * that are not being set via options./* w w w . jav a 2 s. c om*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); //long start = System.currentTimeMillis(); m_ReplaceMissingFilter = new ReplaceMissingValues(); // Missing values replacement is not required so this modification is made /*m_ReplaceMissingFilter.setInputFormat(data); m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);*/ Instances m_instances = new Instances(data); // To use semantic measurers through DistanceFunction interface m_DistanceFunction.setInstances(m_instances); initMinMax(m_instances); m_ClusterCentroids = new Instances(m_instances, m_NumClusters); int n = m_instances.numInstances(); Random r = new Random(getSeed()); boolean[] selected = new boolean[n]; double[] minDistance = new double[n]; for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE; int firstI = r.nextInt(n); m_ClusterCentroids.add(m_instances.instance(firstI)); selected[firstI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI)); if (m_NumClusters > n) m_NumClusters = n; for (int i = 1; i < m_NumClusters; i++) { int nextI = farthestAway(minDistance, selected); m_ClusterCentroids.add(m_instances.instance(nextI)); selected[nextI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI)); } m_instances = new Instances(m_instances, 0); //long end = System.currentTimeMillis(); //System.out.println("Clustering Time = " + (end-start)); // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
protected void updateMinDistance(double[] minDistance, boolean[] selected, Instances data, Instance center) { for (int i = 0; i < selected.length; i++) if (!selected[i]) { // Another modification to use DistanceFunction // double d = distance(center,data.instance(i)); double d = m_DistanceFunction.distance(center, data.instance(i)); if (d < minDistance[i]) minDistance[i] = d;// www . j a v a2s . c o m } }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; }//from w w w. j a v a 2 s.co m for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options.//from www . j a va 2 s . c o m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:agnes.MyAgnes.java
public void buildClusterer(Instances data) { distanceCounter = new EuclideanDistance(data); ArrayList<ArrayList<Instance>> currentClusters = new ArrayList<>(); for (int i = 0; i < data.numInstances(); i++) { currentClusters.add(new ArrayList<>()); currentClusters.get(i).add(data.instance(i)); instanceID.put(data.instance(i), i); }/* w w w. j av a 2 s .c om*/ addNewClusterHierarchy(currentClusters); }
From source file:ai.BalancedRandomForest.java
License:GNU General Public License
/** * Build Balanced Random Forest//from w w w .ja va2s .c o m */ public void buildClassifier(final Instances data) throws Exception { // If number of features is 0 then set it to log2 of M (number of attributes) if (numFeatures < 1) numFeatures = (int) Utils.log2(data.numAttributes()) + 1; // Check maximum number of random features if (numFeatures >= data.numAttributes()) numFeatures = data.numAttributes() - 1; // Initialize array of trees tree = new BalancedRandomTree[numTrees]; // total number of instances final int numInstances = data.numInstances(); // total number of classes final int numClasses = data.numClasses(); final ArrayList<Integer>[] indexSample = new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) indexSample[i] = new ArrayList<Integer>(); //System.out.println("numClasses = " + numClasses); // fill indexSample with the indices of each class for (int i = 0; i < numInstances; i++) { //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue()); indexSample[(int) data.get(i).classValue()].add(i); } final Random random = new Random(seed); // Executor service to run concurrent trees final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees); final boolean[][] inBag = new boolean[numTrees][numInstances]; try { for (int i = 0; i < numTrees; i++) { final ArrayList<Integer> bagIndices = new ArrayList<Integer>(); // Randomly select the indices in a balanced way for (int j = 0; j < numInstances; j++) { // Select first the class final int randomClass = random.nextInt(numClasses); // Select then a random sample of that class final int randomSample = random.nextInt(indexSample[randomClass].size()); bagIndices.add(indexSample[randomClass].get(randomSample)); inBag[i][indexSample[randomClass].get(randomSample)] = true; } // Create random tree final Splitter splitter = new Splitter( new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt()))); futures.add(exe.submit(new Callable<BalancedRandomTree>() { public BalancedRandomTree call() { return new BalancedRandomTree(data, bagIndices, splitter); } })); } // Grab all trained trees before proceeding for (int treeIdx = 0; treeIdx < numTrees; treeIdx++) tree[treeIdx] = futures.get(treeIdx).get(); // Calculate out of bag error final boolean numeric = data.classAttribute().isNumeric(); List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { VotesCollector aCollector = new VotesCollector(tree, i, data, inBag); votes.add(exe.submit(aCollector)); } double outOfBagCount = 0.0; double errorSum = 0.0; for (int i = 0; i < data.numInstances(); i++) { double vote = votes.get(i).get(); // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } outOfBagError = errorSum / outOfBagCount; } catch (Exception ex) { ex.printStackTrace(); } finally { exe.shutdownNow(); } }
From source file:analysis.SilhouetteIndex.java
public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception { //Map<Integer, Instances> clustermap = sk.clusterInstance; sk.setNumClusters(c);//from w ww.j a v a 2s .co m sk.buildClusterer(inst); EuclideanDistance ed = new EuclideanDistance(); double avgSilhouetteOverAllPoints = 0.d; if (sk.getNumClusters() == 1) { //Index is not defined for k=1. needs at least 2 clusters return Double.NaN; } for (int i = 0; i < inst.numInstances(); i++) { //for the current element get its cluster int currentcluster = sk.clusterInstance(inst.instance(i)); //System.out.println(inst.instance(i).value(2)); double[] current_attr = new double[inst.numAttributes()]; double[] other_attr = new double[inst.numAttributes()]; //get attributes of the current instance for (int attr = 0; attr < inst.numAttributes(); attr++) { current_attr[attr] = inst.instance(i).value(attr); } // int counter double[] distances = new double[sk.getNumClusters()]; int[] counters = new int[sk.getNumClusters()]; //System.out.println("distances: "+distances.length); double avgInClusterDist = 0, dist = 0; int countsamecluster = 0; distances[currentcluster] = Double.MAX_VALUE; for (int j = 0; j < inst.numInstances(); j++) { for (int attr = 0; attr < inst.numAttributes(); attr++) { other_attr[attr] = inst.instance(j).value(attr); } //get cluster number of j th element int clusternumber = sk.clusterInstance(inst.instance(j)); //check if j and i in the same cluster if (clusternumber == currentcluster) { if (inst.instance(i) != inst.instance(j)) { //calculate average dist to other elements in the cluster //inst. dist = ed.compute(current_attr, other_attr); avgInClusterDist = avgInClusterDist + dist; countsamecluster++; } } else { dist = ed.compute(current_attr, other_attr); distances[clusternumber] = distances[clusternumber] + dist; counters[clusternumber]++; } } //calculate value ai if (countsamecluster > 0) { avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai } //find average distances to other clusters for (int k = 0; k < distances.length; k++) { if (k != currentcluster) { distances[k] = distances[k] / counters[k]; } } //Find the min value of average distance to other clusters double min = distances[0]; for (int k = 1; k < distances.length; k++) { if (min > distances[k]) { min = distances[k]; } } //si for current element: double si; // if we only have one element in our cluster it makes sense to set // si = 0 if (countsamecluster == 1) { si = 0.0d; } else { si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist); } avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si; } //System.out.println(inst.numInstances()); return avgSilhouetteOverAllPoints / inst.numInstances(); }
From source file:AnDE.wdAnDEonline.java
License:Open Source License
@Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances.deleteWithMissingClass();/* w w w. j a v a 2 s. co m*/ nInstances = instances.numInstances(); nAttributes = instances.numAttributes() - 1; nc = instances.numClasses(); probs = new double[nc]; paramsPerAtt = new int[nAttributes]; for (int u = 0; u < nAttributes; u++) { paramsPerAtt[u] = instances.attribute(u).numValues(); } /* * Initialize structure array based on m_S */ if (m_S.equalsIgnoreCase("A0DE")) { // A0DE numTuples = 0; } else if (m_S.equalsIgnoreCase("A1DE")) { // A1DE numTuples = 1; } else if (m_S.equalsIgnoreCase("A2DE")) { // A2DE numTuples = 2; } /* * ---------------------------------------------------------------------------------------- * Start Parameter Learning Process * ---------------------------------------------------------------------------------------- */ int scheme = 1; /* * --------------------------------------------------------------------------------------------- * Intitialize data structure * --------------------------------------------------------------------------------------------- */ scheme = plTechniques.MAP; logDComputer = LogDistributionComputerAnDE.getDistributionComputer(numTuples, scheme); dParameters_ = new wdAnDEParametersFlat(nAttributes, nc, nInstances, paramsPerAtt, scheme, numTuples, m_MVerb); if (m_MVerb) System.out.println("All data structures are initialized. Starting to estimate parameters."); if (nInstances > 0) { for (int i = 0; i < nInstances; i++) { Instance instance = instances.instance(i); dParameters_.updateFirstPass(instance); } } }
From source file:ann.ANN.java
public void percentageSplit(Classifier model, double percent, Instances data) { try {// w ww. j a v a 2 s . co m int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances train = new Instances(data, trainSize); Instances test = new Instances(data, testSize); ; for (int i = 0; i < trainSize; i++) { train.add(data.instance(i)); } for (int i = trainSize; i < data.numInstances(); i++) { test.add(data.instance(i)); } Evaluation eval = new Evaluation(train); eval.evaluateModel(model, test); System.out.println("================================"); System.out.println("========Percentage Split======="); System.out.println("================================"); System.out.println(eval.toSummaryString("\n=== Summary ===\n", false)); System.out.println(eval.toClassDetailsString("=== Detailed Accuracy By Class ===\n")); System.out.println(eval.toMatrixString("=== Confusion Matrix ===\n")); } catch (Exception ex) { System.out.println("File tidak berhasil di-load"); } }