List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:moa.core.VectorDistances.java
License:Apache License
/** * Coefficient of Divergence (Legendre and Legendre, 1983) * Also known as the Canberra distance/* w ww . ja va2 s .c o m*/ * @param src first data point to compare from * @param dst second data point to compare to * @param header feature weight (strictly speaking, all weights should be 1 for pure Minkowski) * @return distance */ public static synchronized double distanceDivergence(double[] src, double[] dst, Instances header) { double ret = 0.0; int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes())); if (minSize < 1) { return Double.MAX_VALUE; } for (int i = 0; i < minSize; i++) { if (Math.abs(src[i] + dst[i]) <= 0) continue; double d = Math.abs((src[i] - dst[i]) / (src[i] + dst[i])); ret += d * d * header.attribute(i).weight(); } ret = Math.sqrt(ret / minSize); // Safety... if (Double.isInfinite(ret)) { ret = Double.MAX_VALUE; } else if (Double.isNaN(ret)) { ret = 0.0; } return ret; }
From source file:moa.core.VectorDistances.java
License:Apache License
/** * Bray-Curtis distance/*from w w w . j a va 2 s . c o m*/ * @param src first data point to compare from * @param dst second data point to compare to * @param header feature weight and meta-data * @return distance */ public static synchronized double distanceBray(double[] src, double[] dst, Instances header) { double ret = 0.0; int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes())); if (minSize < 1) { return Double.MAX_VALUE; } double numerator = 0; double denominator = 0; for (int i = 0; i < minSize; i++) { numerator += header.attribute(i).weight() * Math.abs(src[i] - dst[i]); denominator += header.attribute(i).weight() * Math.abs(src[i] + dst[i]); } ret += (denominator != 0.0) ? numerator / denominator : Double.MAX_VALUE; // Safety... if (Double.isInfinite(ret)) { ret = Double.MAX_VALUE; } else if (Double.isNaN(ret)) { ret = 0.0; } return ret; }
From source file:moa.core.VectorDistances.java
License:Apache License
/** * Chord distance (Orloci, 1967)/*from w w w. java2s. co m*/ * @param src first data point to compare from * @param dst second data point to compare to * @param header feature weight (strictly speaking, all weights should be 1 for pure Minkowski) * @return distance of the chord joining two normalized points within a hypersphere of radius 1 */ public static synchronized double distanceChord(double[] src, double[] dst, Instances header) { double ret = 0.0; int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes())); if (minSize < 1) { return Double.MAX_VALUE; } double srcL2Norm = 0.0; double dstL2Norm = 0.0; for (int i = 0; i < src.length; i++) { srcL2Norm += src[i] * src[i]; } srcL2Norm = Math.sqrt(srcL2Norm); for (int i = 0; i < dst.length; i++) { dstL2Norm += dst[i] * dst[i]; } dstL2Norm = Math.sqrt(dstL2Norm); for (int i = 0; i < minSize; i++) { ret += src[i] * dst[i] * header.attribute(i).weight(); } ret = Math.abs(2.0 - 2 * (ret / (srcL2Norm * dstL2Norm))); ret = Math.sqrt(ret); // Safety... if (Double.isInfinite(ret)) { ret = Double.MAX_VALUE; } else if (Double.isNaN(ret)) { ret = 0.0; } return ret; }
From source file:moa.streams.ConceptDriftRealStream.java
License:Open Source License
@Override public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { this.inputStream = (InstanceStream) getPreparedClassOption(this.streamOption); this.driftStream = (InstanceStream) getPreparedClassOption(this.driftstreamOption); this.random = new Random(this.randomSeedOption.getValue()); numberInstanceStream = 0;//from w w w . jav a 2 s . com if (this.alphaOption.getValue() != 0.0) { this.widthOption.setValue((int) (1 / Math.tan(this.alphaOption.getValue() * Math.PI / 180))); } // generate header Instances first = this.inputStream.getHeader(); Instances second = this.driftStream.getHeader(); FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes() - 1; i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes() - 1; i++) { newAttributes.addElement(second.attribute(i)); } Attribute classLabels; if (first.numClasses() < second.numClasses()) { classLabels = second.classAttribute(); } else { classLabels = first.classAttribute(); } newAttributes.addElement(classLabels); this.streamHeader = new InstancesHeader( new Instances(getCLICreationString(InstanceStream.class), newAttributes, 0)); this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1); restart(); }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
/** * Copies the class attribute to another position (first position) * * @param instances/*from w w w. jav a 2s . co m*/ * @param newName * @param newAttributeIndex * @return * @throws Exception */ public static Instances copyClassAttribute(Instances instances, String newName, int newAttributeIndex) throws Exception { int whichAttribute = instances.classIndex(); Add filter = new Add(); filter.setAttributeIndex("" + newAttributeIndex); filter.setAttributeName(newName); // Copy nominal Attribute if (instances.attribute(whichAttribute).isNominal()) { String newNominalLabels = ""; Boolean first = true; Enumeration<Object> o = instances.attribute(whichAttribute).enumerateValues(); while (o.hasMoreElements()) { String s = (String) o.nextElement(); if (!first) { newNominalLabels += ","; } newNominalLabels += s; first = false; } filter.setNominalLabels(newNominalLabels); } filter.setInputFormat(instances); instances = Filter.useFilter(instances, filter); return instances; }
From source file:model.clasification.ModelFinder.java
public static void writeIntoTxt(String modelName) throws IOException { Instances data = Data.getInstance().getInstances(); PrintWriter bw = new PrintWriter( new BufferedWriter(new FileWriter("methods/classification/TxtFiles/" + modelName + ".txt"))); // BufferedWriter bw = new BufferedWriter(new FileWriter("methods/classification/TxtFiles/" + modelName + ".txt")); int brojAtr = data.numAttributes() - 1; for (int i = 0; i < brojAtr; i++) { int prviR = data.attribute(i).toString().indexOf("'"); int drugiR = data.attribute(i).toString().lastIndexOf("'"); bw.write(data.attribute(i).toString().substring(prviR + 1, drugiR) + ", "); }/*from w ww . j a v a 2s. co m*/ bw.close(); }
From source file:mulan.classifier.meta.ConstrainedKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer * that are not being set via options./*w ww .ja va 2s . c o m*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { for (int i = 0; i < m_NumClusters; i++) { bucket[i] = new ArrayList<bucketInstance>(); } // calculate bucket size bucketSize = (int) Math.ceil(data.numInstances() / (double) m_NumClusters); //System.out.print("bucketSize = " + bucketSize + "\n"); // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); m_Min = new double[instances.numAttributes()]; m_Max = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); i++) { updateMinMax(instances.instance(i)); } Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; for (int j = instances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(instances.instance(instIndex), instances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(instances.instance(instIndex)); initC.put(hk, null); } instances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; while (!converged) { // reset buckets for (int j = 0; j < m_NumClusters; j++) { bucket[j] = new ArrayList<bucketInstance>(); } emptyClusterCount = 0; m_Iterations++; //System.out.println(">>Iterations: "+m_Iterations); converged = true; for (i = 0; i < instances.numInstances(); i++) { //System.out.println("processing instance: " + i); Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } if (m_Iterations > maxIterations) { converged = true; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { double[] vals = new double[instances.numAttributes()]; if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { for (int j = 0; j < instances.numAttributes(); j++) { vals[j] = tempI[i].meanOrMode(j); m_ClusterNominalCounts[i][j] = tempI[i].attributeStats(j).nominalCounts; } m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } //System.out.println("centroid: " + i + " " + m_ClusterCentroids.instance(i).toString()); } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; tempI = new Instances[m_NumClusters]; } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } // reset buckets for (int j = 0; j < m_NumClusters; j++) { bucket[j] = new ArrayList<bucketInstance>(); } m_ClusterStdDevs = new Instances(instances, m_NumClusters); m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); m_ClusterSizes[i] = tempI[i].numInstances(); } }
From source file:mulan.classifier.meta.HierarchyBuilder.java
License:Open Source License
/** * Creates the hierarchical dataset according to the original multilabel * instances object and the constructed label hierarchy * * @param mlData the original multilabel instances * @param metaData the metadata of the constructed label hierarchy * @return the produced dataset//from www. j a v a 2 s.c om * @throws InvalidDataFormatException */ public static MultiLabelInstances createHierarchicalDataset(MultiLabelInstances mlData, LabelsMetaData metaData) throws InvalidDataFormatException { Set<String> leafLabels = mlData.getLabelsMetaData().getLabelNames(); Set<String> metaLabels = metaData.getLabelNames(); for (String string : leafLabels) { metaLabels.remove(string); } Instances dataSet = mlData.getDataSet(); int numMetaLabels = metaLabels.size(); // copy existing attributes ArrayList<Attribute> atts = new ArrayList<Attribute>(dataSet.numAttributes() + numMetaLabels); for (int i = 0; i < dataSet.numAttributes(); i++) { atts.add(dataSet.attribute(i)); } ArrayList<String> labelValues = new ArrayList<String>(); labelValues.add("0"); labelValues.add("1"); // add metalabel attributes for (String metaLabel : metaLabels) { atts.add(new Attribute(metaLabel, labelValues)); } // initialize dataset Instances newDataSet = new Instances("hierarchical", atts, dataSet.numInstances()); // copy features and labels, set metalabels for (int i = 0; i < dataSet.numInstances(); i++) { //System.out.println("Constructing instance " + (i+1) + "/" + dataSet.numInstances()); // initialize new values double[] newValues = new double[newDataSet.numAttributes()]; Arrays.fill(newValues, 0); // copy features and labels double[] values = dataSet.instance(i).toDoubleArray(); System.arraycopy(values, 0, newValues, 0, values.length); // set metalabels for (String label : leafLabels) { Attribute att = dataSet.attribute(label); if (att.value((int) dataSet.instance(i).value(att)).equals("1")) { //System.out.println(label); //System.out.println(Arrays.toString(metaData.getLabelNames().toArray())); LabelNode currentNode = metaData.getLabelNode(label); // put 1 all the way up to the root, unless you see a 1, in which case stop while (currentNode.hasParent()) { currentNode = currentNode.getParent(); Attribute currentAtt = newDataSet.attribute(currentNode.getName()); // change the following to refer to the array if (newValues[atts.indexOf(currentAtt)] == 1) // no need to go more up { break; } else // put 1 { newValues[atts.indexOf(currentAtt)] = 1; } } } } Instance instance = dataSet.instance(i); newDataSet.add(DataUtils.createInstance(instance, instance.weight(), newValues)); } return new MultiLabelInstances(newDataSet, metaData); }
From source file:mulan.classifier.meta.HMC.java
License:Open Source License
private void buildRec(HMCNode node, Instances data) throws InvalidDataFormatException, Exception { String metaLabel = node.getName(); //debug("Preparing node data"); Set<String> childrenLabels = new HashSet<String>(); Set<String> currentlyAvailableLabels = new HashSet<String>(); if (metaLabel.equals("root")) { for (LabelNode child : originalMetaData.getRootLabels()) { childrenLabels.add(child.getName()); }/*from w w w . j a v a 2 s . co m*/ currentlyAvailableLabels = originalMetaData.getLabelNames(); } else { LabelNode labelNode = originalMetaData.getLabelNode(metaLabel); for (LabelNode child : labelNode.getChildren()) { childrenLabels.add(child.getName()); } currentlyAvailableLabels = labelNode.getDescendantLabels(); } // delete non-children labels Set<String> labelsToDelete = new HashSet(currentlyAvailableLabels); labelsToDelete.removeAll(childrenLabels); //===================================================== // System.out.println("Children: " + Arrays.toString(childrenLabels.toArray())); // System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete.toArray())); //====================================================== int[] indicesToDelete = new int[labelsToDelete.size()]; int counter1 = 0; for (String label : labelsToDelete) { indicesToDelete[counter1] = data.attribute(label).index(); counter1++; } Remove filter1 = new Remove(); filter1.setAttributeIndicesArray(indicesToDelete); filter1.setInputFormat(data); Instances nodeInstances = Filter.useFilter(data, filter1); // System.out.println() // create meta data LabelsMetaDataImpl nodeMetaData = new LabelsMetaDataImpl(); for (String label : childrenLabels) { nodeMetaData.addRootNode(new LabelNodeImpl(label)); } // create multi-label instance MultiLabelInstances nodeData = new MultiLabelInstances(nodeInstances, nodeMetaData); //================================================== // System.out.println("Building model"); //============================================ node.build(nodeData); //============================================ System.out.println("spark #instances:" + nodeInstances.numInstances()); //============================================ TotalUsedTrainInsts += nodeInstances.numInstances(); NoNodes++; //============================================ // System.out.println("spark:#nodes: "+ root); //============================================ for (String childLabel : childrenLabels) { LabelNode childNode = originalMetaData.getLabelNode(childLabel); if (!childNode.hasChildren()) { continue; } //================================= // System.out.println("Preparing child data"); //============================================ // remove instances where child is 0 int childMetaLabelIndex = data.attribute(childLabel).index(); Instances childData = new Instances(data); for (int i = 0; i < childData.numInstances(); i++) { if (childData.instance(i).stringValue(childMetaLabelIndex).equals("0")) { childData.delete(i); // While deleting an instance from the trainSet, i must reduced too i--; } } // delete non-descendant labels Set<String> descendantLabels = childNode.getDescendantLabels(); Set<String> labelsToDelete2 = new HashSet(currentlyAvailableLabels); labelsToDelete2.removeAll(descendantLabels); //System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete2.toArray())); int[] indicesToDelete2 = new int[labelsToDelete2.size()]; int counter2 = 0; for (String label : labelsToDelete2) { indicesToDelete2[counter2] = childData.attribute(label).index(); counter2++; } Remove filter2 = new Remove(); filter2.setAttributeIndicesArray(indicesToDelete2); filter2.setInputFormat(childData); childData = Filter.useFilter(childData, filter2); MultiLabelLearner mll = baseLearner.makeCopy(); HMCNode child = new HMCNode(childLabel, mll); node.addChild(child); buildRec(child, childData); } }
From source file:mulan.classifier.neural.DataPair.java
License:Open Source License
/** * Creates a {@link DataPair} representation for each {@link Instance} contained in * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation * of instance values (by double values), which is useful when iteration over the data and its * values.// w ww .j av a 2 s .c o m * * @param mlDataSet the {@link MultiLabelInstances} which content has to be * converted to list of {@link DataPair} * @param bipolarOutput indicates whether output values should be converted * to bipolar values, or left intact as binary * @return the list of data pairs */ // TODO: this method should be in some kind of "data utils". public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) { Instances data = mlDataSet.getDataSet(); int[] featureIndices = mlDataSet.getFeatureIndices(); int[] labelIndices = mlDataSet.getLabelIndices(); int numFeatures = featureIndices.length; int numLabels = mlDataSet.getNumLabels(); int numInstances = data.numInstances(); List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances); for (int index = 0; index < numInstances; index++) { Instance instance = data.instance(index); double[] input = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { int featureIndex = featureIndices[i]; Attribute featureAttr = instance.attribute(featureIndex); // if attribute is binary, parse the string value ... it is expected to be '0' or '1' if (featureAttr.isNominal() && featureAttr.numValues() == 2) { input[i] = Double.parseDouble(instance.stringValue(featureIndex)); } // else : // a) the attribute is nominal with multiple values, use indexes as nominal values // do not have to be numbers in general ... this is fall-back ... should be rare case // b) is numeric attribute else { input[i] = instance.value(featureIndex); } } if (mlDataSet.hasMissingLabels(instance)) continue; double[] output = new double[numLabels]; for (int i = 0; i < numLabels; i++) { output[i] = Double .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i]))); if (bipolarOutput && output[i] == 0) { output[i] = -1; } } dataPairs.add(new DataPair(input, output)); } return dataPairs; }