List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Invokes filter to transform last parameter into a Nominal * /* ww w . j a v a 2 s . co m*/ * @param data * @return * @throws Exception */ public static Instances makeLastNominal(Instances data) throws Exception { Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } return newData; }
From source file:com.sliit.normalize.NormalizeDataset.java
public String normalizeDataset() { System.out.println("start normalizing data"); String filePathOut = ""; try {/* w w w .j a v a 2 s .c o m*/ CSVLoader loader = new CSVLoader(); if (reducedDiemensionFile != null) { loader.setSource(reducedDiemensionFile); } else { if (tempFIle != null && tempFIle.exists()) { loader.setSource(tempFIle); } else { loader.setSource(csvFile); } } Instances dataInstance = loader.getDataSet(); Normalize normalize = new Normalize(); dataInstance.setClassIndex(dataInstance.numAttributes() - 1); normalize.setInputFormat(dataInstance); String directory = csvFile.getParent(); outputFile = new File(directory + "/" + "normalized" + csvFile.getName()); if (!outputFile.exists()) { outputFile.createNewFile(); } CSVSaver saver = new CSVSaver(); saver.setFile(outputFile); for (int i = 1; i < dataInstance.numInstances(); i++) { normalize.input(dataInstance.instance(i)); } normalize.batchFinished(); Instances outPut = new Instances(dataInstance, 0); for (int i = 1; i < dataInstance.numInstances(); i++) { outPut.add(normalize.output()); } Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1); for (int j = 0; j < attribute.numValues(); j++) { if (attribute.value(j).equals("normal.")) { outPut.renameAttributeValue(attribute, attribute.value(j), "0"); } else { outPut.renameAttributeValue(attribute, attribute.value(j), "1"); } } saver.setInstances(outPut); saver.writeBatch(); writeToNewFile(directory); filePathOut = directory + "norm" + csvFile.getName(); if (tempFIle != null) { tempFIle.delete(); } if (reducedDiemensionFile != null) { reducedDiemensionFile.delete(); } outputFile.delete(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } catch (Exception e) { log.error("Error occurred:" + e.getMessage()); } return filePathOut; }
From source file:com.sliit.normalize.NormalizeDataset.java
public boolean updateStringValues(Map<Integer, String> values) { System.out.println("updating String Values"); boolean status = false; try {/*www. j a v a 2s . co m*/ csv.setSource(csvFile); Instances dataInstance = csv.getDataSet(); for (int i = 0; i < dataInstance.numInstances(); i++) { if (values.containsKey(i)) { Attribute attribute = dataInstance.attribute(i); for (int j = 0; j < attribute.numValues(); j++) { dataInstance.renameAttributeValue(attribute, attribute.value(j), j + ""); } } } tempFIle = new File(csvFile.getParent() + "/temp.csv"); CSVSaver saver = new CSVSaver(); saver.setInstances(dataInstance); saver.setFile(tempFIle); saver.writeBatch(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } return status; }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * Returns class probability distribution for the given instance. * //from w w w .ja v a 2 s .com * @param instance the instance to be classified * @return the class probabilities * @throws Exception if an error occurred during the prediction */ @Override public double[] distributionForInstance(Instance instance) throws Exception { if (m_ZeroR != null) { return m_ZeroR.distributionForInstance(instance); } else { double[] result = new double[instance.numClasses()]; if (m_ActualClusterer != null) { // build new instance Instances tempData = m_ClusteringHeader.stringFreeStructure(); double[] values = new double[tempData.numAttributes()]; int n = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == instance.classIndex()) { continue; } if (instance.attribute(i).isString()) { values[n] = tempData.attribute(n).addStringValue(instance.stringValue(i)); } else if (instance.attribute(i).isRelationValued()) { values[n] = tempData.attribute(n).addRelation(instance.relationalValue(i)); } else { values[n] = instance.value(i); } n++; } Instance newInst = new DenseInstance(instance.weight(), values); newInst.setDataset(tempData); if (!getLabelAllClusters()) { // determine cluster/class double r = m_ClustersToClasses[m_ActualClusterer.clusterInstance(newInst)]; if (r == -1) { return result; // Unclassified } else { result[(int) r] = 1.0; return result; } } else { double[] classProbs = new double[instance.numClasses()]; double[] dist = m_ActualClusterer.distributionForInstance(newInst); for (int i = 0; i < dist.length; i++) { for (int j = 0; j < instance.numClasses(); j++) { classProbs[j] += dist[i] * m_ClusterClassProbs[i][j]; } } Utils.normalize(classProbs); return classProbs; } } else { return result; // Unclassified } } }
From source file:com.tum.classifiertest.DataCache.java
License:Open Source License
/** * Creates a DataCache by copying data from a weka.core.Instances object. *///from ww w . jav a 2s. co m public DataCache(Instances origData) throws Exception { classIndex = origData.classIndex(); numAttributes = origData.numAttributes(); numClasses = origData.numClasses(); numInstances = origData.numInstances(); attNumVals = new int[origData.numAttributes()]; for (int i = 0; i < attNumVals.length; i++) { if (origData.attribute(i).isNumeric()) { attNumVals[i] = 0; } else if (origData.attribute(i).isNominal()) { attNumVals[i] = origData.attribute(i).numValues(); } else throw new Exception("Only numeric and nominal attributes are supported."); } /* Array is indexed by attribute first, to speed access in RF splitting. */ vals = new float[numAttributes][numInstances]; for (int a = 0; a < numAttributes; a++) { for (int i = 0; i < numInstances; i++) { if (origData.instance(i).isMissing(a)) vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end else vals[a][i] = (float) origData.instance(i).value(a); // deep copy } } instWeights = new double[numInstances]; instClassValues = new int[numInstances]; for (int i = 0; i < numInstances; i++) { instWeights[i] = origData.instance(i).weight(); instClassValues[i] = (int) origData.instance(i).classValue(); } /* compute the sortedInstances for the whole dataset */ sortedIndices = new int[numAttributes][]; for (int a = 0; a < numAttributes; a++) { // ================= attr by attr if (a == classIndex) continue; if (attNumVals[a] > 0) { // ------------------------------------- nominal // Handling nominal attributes: as of FastRF 0.99, they're sorted as well // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = new int[numInstances]; //int count = 0; sortedIndices[a] = FastRfUtils.sort(vals[a]); /*for (int i = 0; i < numInstances; i++) { if ( !this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } } for (int i = 0; i < numInstances; i++) { if ( this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } }*/ } else { // ----------------------------------------------------- numeric // Sorted indices are computed for numeric attributes // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = FastRfUtils.sort(vals[a]); } // ---------------------------------------------------------- attr kind } // ========================================================= attr by attr // System.out.println(" Done."); }
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Samoa instances information.//from www.j a v a 2 s .c o m * * @param instances the instances * @return the instances */ public Instances samoaInstancesInformation(weka.core.Instances instances) { Instances samoaInstances; List<Attribute> attInfo = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { attInfo.add(samoaAttribute(i, instances.attribute(i))); } samoaInstances = new Instances(instances.relationName(), attInfo, 0); samoaInstances.setClassIndex(instances.classIndex()); return samoaInstances; }
From source file:com.yimei.core.Discretizer.java
public static Instances discretize(Instances data) { Instances discData = new Instances(data); for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNumeric()) { double max = Double.MIN_VALUE; double min = Double.MAX_VALUE; for (int j = 0; j < data.size(); j++) { double value = data.instance(j).value(i); if (value > max) { max = value;// www . j a v a 2s.c om } if (value < min) { min = value; } } double interval = (max - min) / numOfIntervals; for (int j = 0; j < data.size(); j++) { long discValue = Math.round((data.instance(j).value(i) - min) / interval); discData.instance(j).setValue(i, discValue); } } } return discData; }
From source file:com.zazhu.BlueHub.BlueHub.java
License:Apache License
/** * receives the last reads from the sensors and creates the features we use * only the acc x,y,z (either from internal or external sensor) * /*from w ww . ja v a2s.co m*/ * @param sensorQueue * @throws Exception */ private Instance processingSenseData(Queue<String> sensorQueue, char whatSensor) throws Exception { BufferedReader reader; Instances format; Instance newInstance = null; Log.d(TAG, "Queue size = " + mQueueSize); if (sensorQueue.size() <= 0) throw new Exception("Queue empty"); // create the arrays that will contain the accelerometer data // s.x s.y s.z double[] sx = new double[sensorQueue.size()]; double[] sy = new double[sensorQueue.size()]; double[] sz = new double[sensorQueue.size()]; String rawReading; StringTokenizer st; int index; if (D) Log.e(TAG, "+++ COMPUTING FEATURES +++"); // 1. collect raw data. what kind of sensing data? external vs. internal switch (whatSensor) { case EXTERNAL: index = 0; while ((rawReading = sensorQueue.poll()) != null) { // FORMAT: // "Time_SensorName_SensorNumber_Counter_Xacc_Yacc_Zacc_Xgyro_Ygyro_checksum" // position of the values needed: s.x = 4, s.y = 5, s.z = 6 st = new StringTokenizer(rawReading, FIELD_SEP); // not needed data for (int i = 0; i < 4; i++) st.nextToken(); // s.x, s.y, s.z sx[index] = Double.valueOf(st.nextToken()); sy[index] = Double.valueOf(st.nextToken()); sz[index] = Double.valueOf(st.nextToken()); index += 1; } // 2. process raw data // 2.1 read the input format for the instance (TODO must be changed to // use weka classes) reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_extern))); try { format = new Instances(reader); if (format.classIndex() == -1) format.setClassIndex(format.numAttributes() - 1); // 2.2 create a new instance newInstance = new DenseInstance(7); newInstance.setDataset(format); // set attributes newInstance.setValue(format.attribute(0), Feature.getStd(sx)); newInstance.setValue(format.attribute(1), Feature.getStd(sy)); newInstance.setValue(format.attribute(2), Feature.getStd(sz)); newInstance.setValue(format.attribute(3), Feature.getMean(sx)); newInstance.setValue(format.attribute(4), Feature.getMean(sy)); newInstance.setValue(format.attribute(5), Feature.getMean(sz)); // set unknown class newInstance.setMissing(format.attribute(6)); } catch (IOException e) { e.printStackTrace(); } break; case INTERNAL: index = 0; while ((rawReading = sensorQueue.poll()) != null) { // FORMAT "Xacc_Yacc_Zacc" // position of the values needed: s.x = 0, s.y = 1, s.z = 2 st = new StringTokenizer(rawReading, FIELD_SEP); // s.x, s.y, s.z sx[index] = Double.valueOf(st.nextToken()); sy[index] = Double.valueOf(st.nextToken()); sz[index] = Double.valueOf(st.nextToken()); index += 1; } // 2. process raw data // 2.1 read the input format for the instance (TODO must be changed to // use weka classes) reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_intern))); try { format = new Instances(reader); if (format.classIndex() == -1) format.setClassIndex(format.numAttributes() - 1); // 2.2 create a new instance newInstance = new DenseInstance(7); newInstance.setDataset(format); // set attributes newInstance.setValue(format.attribute(0), Feature.getStd(sx)); newInstance.setValue(format.attribute(1), Feature.getStd(sy)); newInstance.setValue(format.attribute(2), Feature.getStd(sz)); newInstance.setValue(format.attribute(3), Feature.getMean(sx)); newInstance.setValue(format.attribute(4), Feature.getMean(sy)); newInstance.setValue(format.attribute(5), Feature.getMean(sz)); // set unknown class newInstance.setMissing(format.attribute(6)); } catch (IOException e) { e.printStackTrace(); } break; default: if (D) Log.e(TAG, "+++ COMPUTING FEATURES: NO VALUE FOR THE SENSOR READING +++"); break; } return newInstance; }
From source file:com.zooclassifier.Model.FileLoader.java
public FileLoader(String filename) throws FileNotFoundException, IOException { BufferedReader reader = new BufferedReader(new FileReader(filename)); ArffLoader.ArffReader arff = new ArffLoader.ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); attributes = new String[data.numInstances()][data.numAttributes() - 1]; labels = new String[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); for (int j = 0; j < instance.numAttributes() - 1; j++) { attributes[i][j] = instance.stringValue(j); }/*from w w w . j av a 2s.c o m*/ labels[i] = instance.stringValue(instance.numAttributes() - 1); } attributesLegalValues = new String[data.numAttributes() - 1][]; for (int i = 0; i < data.numAttributes() - 1; i++) { attributesLegalValues[i] = (String[]) Collections.list(data.attribute(i).enumerateValues()) .toArray(new String[data.attribute(i).numValues()]); } labelsLegalValues = (String[]) Collections.list(data.attribute(data.numAttributes() - 1).enumerateValues()) .toArray(new String[data.attribute(data.numAttributes() - 1).numValues()]); }
From source file:Controller.CtlDataMining.java
public String definirEncabezado(Instances data) { /*Se define el encabezado del mensaje, teniendo en cuanta el atributo clase*/ String descripcion = "<b>El atributo clase seleccionado es " + data.attribute(data.numAttributes() - 1).name() + "</b>"; descripcion += " <b>con posibles valores:</b> "; /*Se recorren los posibles valores del atributo clase*/ for (int z = 0; z < data.attribute(data.numAttributes() - 1).numValues(); z++) { descripcion += "<b>" + data.attribute(data.numAttributes() - 1).value(z) + "</b> "; }//from w w w . ja va 2 s . com return descripcion; }