List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector./* w ww . j av a 2 s . co m*/ */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }
From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java
License:Open Source License
/** * Builds the classifier/*w w w. ja va 2s .c om*/ * * @param instances the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_standardize = new Standardize(); m_standardize.setInputFormat(instances); instances = Filter.useFilter(instances, m_standardize); SimpleKMeans sk = new SimpleKMeans(); sk.setNumClusters(m_numClusters); sk.setSeed(m_clusteringSeed); MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer(); dc.setClusterer(sk); dc.setMinStdDev(m_minStdDev); m_basisFilter = new ClusterMembership(); m_basisFilter.setDensityBasedClusterer(dc); m_basisFilter.setInputFormat(instances); Instances transformed = Filter.useFilter(instances, m_basisFilter); if (instances.classAttribute().isNominal()) { m_linear = null; m_logistic = new Logistic(); m_logistic.setRidge(m_ridge); m_logistic.setMaxIts(m_maxIts); m_logistic.buildClassifier(transformed); } else { m_logistic = null; m_linear = new LinearRegression(); m_linear.setAttributeSelectionMethod( new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION)); m_linear.setRidge(m_ridge); m_linear.buildClassifier(transformed); } }
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric./*from w ww .j a v a 2s .c o m*/ * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }
From source file:br.fapesp.myutils.MyUtils.java
License:Open Source License
public static void print_dataset_as_matrix(Instances data) { for (int i = 0; i < data.numInstances(); i++) { for (int j = 0; j < data.numAttributes(); j++) System.out.print(data.instance(i).value(j) + " "); System.out.println();/*w w w . j a v a 2s .c om*/ } }
From source file:br.fapesp.myutils.MyUtils.java
License:Open Source License
/** * Convert an Instances data set to a doubles matrix. * @param data// w w w . j av a 2s. c o m * @return data as a double array */ public static double[][] convertInstancesToDoubleMatrix(Instances data) { int N = data.numInstances(); int m = data.numAttributes(); double[][] ddata = new double[N][m]; double[] temp; for (int i = 0; i < N; i++) { temp = data.instance(i).toDoubleArray(); for (int j = 0; j < m; j++) ddata[i][j] = temp[j]; } return (ddata); }
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public Instances parseData(Object objData) { try {/*from www .jav a 2 s.c om*/ Instances dataInstance; DataBag values = (DataBag) objData; int numAttributes = values.iterator().next().size(); // N_Features + 1 Class int bagSize = 0; // To set the number of train samples // To find the number of samples (instances in a bag) for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { it.next(); bagSize = bagSize + 1; } // Code for find the different classes names in the input String[] inputClass = new String[bagSize]; // String vector with the samples class's names int index = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple tuple = it.next(); inputClass[index] = DataType.toString(tuple.get(numAttributes - 1)); index = index + 1; } HashSet classSet = new HashSet(Arrays.asList(inputClass)); String[] classValue = (String[]) classSet.toArray(new String[0]); // To set the classes names in the attribute for the instance FastVector classNames = new FastVector(); for (int i = 0; i < classValue.length; i++) classNames.addElement(classValue[i]); // Creating the instance model N_Features + 1_ClassNames FastVector atts = new FastVector(); for (int i = 0; i < numAttributes - 1; i++) atts.addElement(new Attribute("att" + i)); dataInstance = new Instances("MyRelation", atts, numAttributes); dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1); // To set the instance values for the dataInstance model created Instance tmpData = new DenseInstance(numAttributes); index = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple tuple = it.next(); for (int i = 0; i < numAttributes - 1; i++) tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i))); //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1))); dataInstance.add(tmpData); dataInstance.instance(index).setValue(numAttributes - 1, DataType.toString(tuple.get(numAttributes - 1))); index = index + 1; } // Setting the class index dataInstance.setClassIndex(dataInstance.numAttributes() - 1); return dataInstance; } catch (Exception e) { System.err.println("Failed to process input; error - " + e.getMessage()); return null; } }
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public Instances parseData(BufferedReader buff) { try {//ww w. ja va 2 s . c o m Instances dataInstance; //DataBag values = (DataBag)objData; int numAttributes = 0; // N_Features + 1 Class List<String> inputClass = new ArrayList<String>(); List<String[]> dataset = new ArrayList<String[]>(); // To find the number of samples (instances in a bag) String line; while ((line = buff.readLine()) != null) { if (!line.isEmpty()) { String[] data = line.split(","); if (numAttributes == 0) numAttributes = data.length; inputClass.add(data[data.length - 1]); dataset.add(data); } } HashSet classSet = new HashSet(inputClass); String[] classValue = (String[]) classSet.toArray(new String[0]); // To set the classes names in the attribute for the instance FastVector classNames = new FastVector(); for (int i = 0; i < classValue.length; i++) classNames.addElement(classValue[i]); // Creating the instance model N_Features + 1_ClassNames FastVector atts = new FastVector(); for (int i = 0; i < numAttributes - 1; i++) atts.addElement(new Attribute("att" + i)); dataInstance = new Instances("MyRelation", atts, numAttributes); dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1); // To set the instance values for the dataInstance model created Instance tmpData = new DenseInstance(numAttributes); int index = 0; for (int k = 0; k < dataset.size(); k++) { for (int i = 0; i < numAttributes - 1; i++) tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i])); //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1))); dataInstance.add(tmpData); dataInstance.instance(index).setValue(numAttributes - 1, DataType.toString(dataset.get(k)[numAttributes - 1])); index = index + 1; } // Setting the class index dataInstance.setClassIndex(dataInstance.numAttributes() - 1); return dataInstance; } catch (Exception e) { System.err.println("Failed to process input; error - " + e.getMessage()); return null; } }
From source file:br.ufpe.cin.mpos.offload.DynamicDecisionSystem.java
License:Apache License
public synchronized boolean isRemoteAdvantage(int InputSize, Remotable.Classifier classifierRemotable) { boolean resp = false; try {/* ww w. jav a 2 s. c o m*/ if ((!(this.classifierModel.equals(classifierRemotable.toString()))) || this.classifier == null) { Log.d("classificacao", "classificador=" + classifierRemotable.toString()); this.classifierModel = classifierRemotable.toString(); loadClassifier(classifierRemotable); } Cursor c = dc.getData(); int colunas = c.getColumnCount(); Instance instance = new DenseInstance(colunas - 2); ArrayList<String> values = new ArrayList<String>(); ArrayList<Attribute> atts = new ArrayList<Attribute>(); if (c.moveToFirst()) { for (int i = 1; i <= colunas - 2; i++) { String feature = c.getColumnName(i); String value = c.getString(i); Attribute attribute; if (feature.equals(DatabaseManager.InputSize)) { values.add("" + InputSize); attribute = new Attribute(DatabaseManager.InputSize); } else { String[] strings = populateAttributes(i); ArrayList<String> attValues = new ArrayList<String>(Arrays.asList(strings)); attribute = new Attribute(feature, attValues); if (value != null) { values.add(value); } } atts.add(attribute); } Instances instances = new Instances("header", atts, atts.size()); instances.setClassIndex(instances.numAttributes() - 1); instance.setDataset(instances); for (int i = 0; i < atts.size(); i++) { if (i == 9) { instance.setMissing(atts.get(9)); } else if (atts.get(i).name().equals(DatabaseManager.InputSize)) { instance.setValue(atts.get(i), InputSize); } else { instance.setValue(atts.get(i), values.get(i)); } } double value = -1; value = classifier.distributionForInstance(instance)[0]; Log.d("classificacao", instance.toString() + " classifiquei com o seguinte valor" + value); resp = (0.7 <= value); if (resp) { Log.d("classificacao", "sim"); Log.d("Finalizado", "classifiquei " + instance.toString() + " com sim"); } else { Log.d("classificacao", "nao"); Log.d("Finalizado", "classifiquei " + instance.toString() + " com nao"); } } } catch (Exception e) { e.printStackTrace(); Log.e("sqlLite", e.getMessage()); Log.e("sqlLite", "Causa: " + e.getCause()); } return resp; }
From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java
License:Open Source License
public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0;/*w w w . ja v a 2 s.co m*/ m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing // as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most // common value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) m_Assignments = clusterAssignments; m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) initInstances = new Instances(instances); else initInstances = instances; for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index++] = tempI[k]; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (m_Iterations == m_MaxIterations) converged = true; if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } }
From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java
License:Open Source License
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null;/*w w w .j ava 2 s . com*/ int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the // mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) m_ClusterCentroids.add(new DenseInstance(1.0, vals)); return vals; }