List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:adams.ml.data.WekaConverter.java
License:Open Source License
/** * Turns an ADAMS dataset row into a Weka Instance. * * @param data the dataset to use as template * @param row the row to convert// ww w . ja va 2s .c o m * @return the generated instance * @throws Exception if conversion fails */ public static Instance toInstance(Instances data, Row row) throws Exception { Instance result; double[] values; int i; Cell cell; Attribute att; values = new double[data.numAttributes()]; for (i = 0; i < data.numAttributes(); i++) { values[i] = Utils.missingValue(); if (!row.hasCell(i)) continue; cell = row.getCell(i); if (cell.isMissing()) continue; att = data.attribute(i); switch (att.type()) { case Attribute.NUMERIC: values[i] = cell.toDouble(); break; case Attribute.DATE: values[i] = cell.toAnyDateType().getTime(); break; case Attribute.NOMINAL: values[i] = att.indexOfValue(cell.getContent()); break; case Attribute.STRING: values[i] = att.addStringValue(cell.getContent()); break; default: throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att)); } } result = new DenseInstance(1.0, values); result.setDataset(data); return result; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./*from w w w . j ava 2 s . co m*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * //from ww w .j a v a 2 s .com * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { // The centroid is the mean point. If the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:affective.core.ArffLexiconEvaluator.java
License:Open Source License
/** * Processes all the dictionary files./*w w w .ja va2 s . co m*/ * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); List<Attribute> numericAttributes = new ArrayList<Attribute>(); List<Attribute> nominalAttributes = new ArrayList<Attribute>(); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric()) { numericAttributes.add(lexInstances.attribute(i)); // adds the attribute name to the message-level features to be calculated this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name()); } else if (lexInstances.attribute(i).isNominal()) { nominalAttributes.add(lexInstances.attribute(i)); // adds the attribute name together with the nominal value to the message-level features to be calculated int numValues = lexInstances.attribute(i).numValues(); for (int j = 0; j < numValues; j++) this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name() + "-" + lexInstances.attribute(i).value(j)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!numericAttributes.isEmpty()) { Map<String, Double> wordVals = new HashMap<String, Double>(); for (Attribute na : numericAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(na))) wordVals.put(na.name(), inst.value(na)); } this.numDict.put(word, wordVals); } // map nominal associations if (!nominalAttributes.isEmpty()) { Map<String, String> wordCounts = new HashMap<String, String>(); for (Attribute no : nominalAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(no))) { wordCounts.put(no.name(), no.value((int) inst.value(no))); } this.nomDict.put(word, wordCounts); } } } } }
From source file:affective.core.ArffLexiconWordLabeller.java
License:Open Source License
/** * Processes all the dictionary files.//from ww w. ja v a2 s. c o m * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) { this.attributes.add(lexInstances.attribute(i)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!attributes.isEmpty()) { Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>(); for (Attribute na : attributes) { wordVals.put(na, inst.value(na)); } this.attValMap.put(word, wordVals); } } } }
From source file:AnDE.wdAnDEonline.java
License:Open Source License
@Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances.deleteWithMissingClass();/* www . j a v a 2s . c om*/ nInstances = instances.numInstances(); nAttributes = instances.numAttributes() - 1; nc = instances.numClasses(); probs = new double[nc]; paramsPerAtt = new int[nAttributes]; for (int u = 0; u < nAttributes; u++) { paramsPerAtt[u] = instances.attribute(u).numValues(); } /* * Initialize structure array based on m_S */ if (m_S.equalsIgnoreCase("A0DE")) { // A0DE numTuples = 0; } else if (m_S.equalsIgnoreCase("A1DE")) { // A1DE numTuples = 1; } else if (m_S.equalsIgnoreCase("A2DE")) { // A2DE numTuples = 2; } /* * ---------------------------------------------------------------------------------------- * Start Parameter Learning Process * ---------------------------------------------------------------------------------------- */ int scheme = 1; /* * --------------------------------------------------------------------------------------------- * Intitialize data structure * --------------------------------------------------------------------------------------------- */ scheme = plTechniques.MAP; logDComputer = LogDistributionComputerAnDE.getDistributionComputer(numTuples, scheme); dParameters_ = new wdAnDEParametersFlat(nAttributes, nc, nInstances, paramsPerAtt, scheme, numTuples, m_MVerb); if (m_MVerb) System.out.println("All data structures are initialized. Starting to estimate parameters."); if (nInstances > 0) { for (int i = 0; i < nInstances; i++) { Instance instance = instances.instance(i); dParameters_.updateFirstPass(instance); } } }
From source file:app.RunApp.java
License:Open Source License
/** * Action when table attributes clicked with left mouse * //from w ww . j av a 2 s . co m * @param evt Event */ private void tableAttributesLeftMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_tableAttributesLeftMouseClicked if (tabsAttributes.getSelectedIndex() == 0) { int selected = tableAttributesLeft.getSelectedRow(); String attr = tableAttributesLeft.getValueAt(selected, 0).toString(); Instances instances = dataset.getDataSet(); Attribute currentAttr = instances.attribute(attr); double[] attributeValues = instances.attributeToDoubleArray(currentAttr.index()); HeapSort.sort(attributeValues); attributesBoxDiagram2.getChart().setTitle(currentAttr.name()); attributesBoxDiagram2.getChart().getXYPlot().clearAnnotations(); ChartUtils.updateXYChart(attributesBoxDiagram2, HeapSort.getSortedArray()); } }
From source file:app.RunApp.java
License:Open Source License
/** * Generates TableModel for attributes/*from ww w . j a v a 2s .c o m*/ * * @param jtable Table * @param dataset Multi-label dataset * @return Generated TableModel */ private TableModel attributesTableModel(JTable jtable, MultiLabelInstances dataset) { DefaultTableModel tableModel = new DefaultTableModel() { @Override public boolean isCellEditable(int row, int column) { //This causes all cells to be not editable return false; } }; tableModel.addColumn("Attribute"); Object[] row = new Object[1]; Instances instances = dataset.getDataSet(); int numLabels = dataset.getNumLabels(); int numAttributes = instances.numAttributes() - numLabels; Attribute att; for (int i = 0; i < numAttributes; i++) { att = instances.attribute(i); if (att.isNumeric()) { row[0] = att.name(); tableModel.addRow(row); } } jtable.setModel(tableModel); return jtable.getModel(); }
From source file:arffcreator.arffFrame.java
private void createActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_createActionPerformed // TODO add your handling code here: FastVector atts;/*from w w w.j a va2 s .com*/ FastVector attsRel; FastVector attVals; FastVector attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); // - nominal attVals = new FastVector(); for (i = 0; i < 5; i++) attVals.addElement("val" + (i + 1)); atts.addElement(new Attribute("att2", attVals)); // - string atts.addElement(new Attribute("att3", (FastVector) null)); // - date atts.addElement(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new FastVector(); // -- numeric attsRel.addElement(new Attribute("att5.1")); // -- nominal attValsRel = new FastVector(); for (i = 0; i < 5; i++) attValsRel.addElement("val5." + (i + 1)); attsRel.addElement(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.addElement(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); try { // - date vals[3] = data.attribute(3).parseDate("2015-07-30"); } catch (ParseException ex) { Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex); } // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); try { // - date vals[3] = data.attribute(3).parseDate("2015-07-30"); } catch (ParseException ex) { Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex); } // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // 4. output data textArea.append(data.toString()); dataset = data.toString(); }
From source file:arffGenerator.TextDirectoryToArff.java
License:Open Source License
/** * Crea instancias a partir del texto plano que se encuentra en los archivos dentro de las carpetas del path especificado. * Las instancias tendrán como atributos un nominal con la clase (seran los nombres de las carpetas que contienen los archivos) * y un String con el texto que aparece en los ficheros. * @param clase - String que indica los posibles valores de la clase en el fichero arff que se creará. * @param directoryPath - String que indica el path del directorio donde se encuentran los ficheros/directorios con las instancias. * @param data - Conjunto de instancias que forman la estructura para guardar las nuevas instancias. *//*from w ww.j av a2s .c o m*/ private void cargarAtrribDeClase(String clase, String directoryPath, Instances data) { System.out.println("Se crean las instancias de la clase: " + clase); File dir = new File(directoryPath); String[] files = dir.list(); for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { try { double[] newInst = new double[2]; File txt = new File(directoryPath + File.separator + files[i]); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { c = (char) c; if (!isFakeChar((char) c)) txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (clase == null) { newInst[1] = Double.NaN; } else { newInst[1] = (double) data.attribute(1).indexOfValue(clase); } data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } System.out.println("Se crearon las instancias de la clase: " + clase); }