List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
private static Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); }/*from w w w. ja va 2s .co m*/ Enumeration<?> instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (Instances splitData1 : splitData) { splitData1.compactify(); } return splitData; }
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
private static double computeEntropy(Instances data) { double[] classCounts = new double[data.numClasses()]; Enumeration<?> instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }//from ww w . j a va2 s. c om double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:org.opentox.jaqpot3.qsar.InstancesUtil.java
License:Open Source License
/** * Accepts /* w w w .ja v a 2s . co m*/ * @param features * @param data * @param compoundURIposition * Position where the compound URI should be placed. If set to <code>-1</code> * the compound URI will not be included in the created dataset. * @return * A subset of the provided dataset (parameter data in this method) with the * features specified in the provided list with that exact order. The compound * URI feature (string) is placed in the position specified by the parameter * compoundURIposition. * @throws JaqpotException * A JaqpotException is thrown with error code {@link ErrorCause#FeatureNotInDataset FeatureNotInDataset} * in case you provide a feature that is not found in the sumbitted Instances. */ public static Instances sortByFeatureAttrList(List<String> features, final Instances data, int compoundURIposition) throws JaqpotException { int position = compoundURIposition > features.size() ? features.size() : compoundURIposition; if (compoundURIposition != -1) { features.add(position, "compound_uri"); } FastVector vector = new FastVector(features.size()); for (int i = 0; i < features.size(); i++) { String feature = features.get(i); Attribute attribute = data.attribute(feature); if (attribute == null) { throw new JaqpotException("The Dataset you provided does not contain feature:" + feature); } vector.addElement(attribute.copy()); } Instances result = new Instances(data.relationName(), vector, 0); Enumeration instances = data.enumerateInstances(); while (instances.hasMoreElements()) { Instance instance = (Instance) instances.nextElement(); double[] vals = new double[features.size()]; for (int i = 0; i < features.size(); i++) { vals[i] = instance.value(data.attribute(result.attribute(i).name())); } Instance in = new Instance(1.0, vals); result.add(in); } return result; }
From source file:org.opentox.jaqpot3.qsar.InstancesUtil.java
License:Open Source License
public static Instances sortForPMMLModel(List<Feature> list, List<Integer> trFieldsAttrIndex, final Instances data, int compoundURIposition) throws JaqpotException { List<String> features = new ArrayList<String>(); for (Feature feature : list) { features.add(feature.getUri().toString()); }/*from ww w . j a va 2 s. c o m*/ int position = compoundURIposition > features.size() ? features.size() : compoundURIposition; if (compoundURIposition != -1) { features.add(position, "compound_uri"); } FastVector vector = new FastVector(features.size()); for (int i = 0; i < features.size(); i++) { String feature = features.get(i); Attribute attribute = data.attribute(feature); if (attribute == null) { throw new JaqpotException("The Dataset you provided does not contain feature:" + feature); } vector.addElement(attribute.copy()); } int attributeSize = features.size(); if (trFieldsAttrIndex.size() > 0) { for (int i = 0; i < trFieldsAttrIndex.size(); i++) { Attribute attribute = data.attribute(trFieldsAttrIndex.get(i)); if (attribute == null) { throw new JaqpotException("The Dataset you provided does not contain this pmml feature"); } vector.addElement(attribute.copy()); } attributeSize += trFieldsAttrIndex.size(); } Instances result = new Instances(data.relationName(), vector, 0); Enumeration instances = data.enumerateInstances(); while (instances.hasMoreElements()) { Instance instance = (Instance) instances.nextElement(); double[] vals = new double[attributeSize]; for (int i = 0; i < attributeSize; i++) { vals[i] = instance.value(data.attribute(result.attribute(i).name())); } Instance in = new Instance(1.0, vals); result.add(in); } return result; }
From source file:org.opentox.toxotis.factory.DatasetFactory.java
License:Open Source License
/** * Create a dataset using a <code>weka.core.Instances</code> object (based on * Weka, version 3.6.2). Since datasets structurally differ from Instances * object for they store the information in a more expanded way including meta * data and nodes that do not appear in Instances object (or ARFF files), the * provided object has to possess a certain structure: The first attribute of * it has to be always named <code>compound_uri</code> and be of type <code>string</code>. * This attribute stores the URIs of the compounds of the dataset. Second, the rest * attributes have to be of type <code>string</code> or <code>numeric</code> or * <code>nominal</code> and their name should be an acceptable feature URI (for * example <code>http://someserver.com:1234/opentox/feature/54234</code>). * * @param instances/*from w w w . j ava2s .c o m*/ * Instances object to be converted into a Dataset. * @return * The dataset that is created from the provided Instances object. * @throws ToxOtisException * In case the conversion is not possible due to structural inconsistencies * of the provided Instances object. */ public Dataset createFromArff(Instances instances) throws ToxOtisException { if (instances.attribute("compound_uri") == null && instances.attribute("URI") == null) { throw new ToxOtisException("Cannot create an OpenTox dataset out of this dataset because " + "'compound_uri' was not found in it's attribute list"); } Dataset ds = new Dataset(); Enumeration instancesEnum = instances.enumerateInstances(); while (instancesEnum.hasMoreElements()) { Instance instance = (Instance) instancesEnum.nextElement(); ds.getDataEntries().add(createDataEntry(instance)); } try { ds.setUri(new VRI(instances.relationName())); } catch (URISyntaxException ex) { throw new ToxOtisException( "The relation name '" + instances.relationName() + "' is not" + "a valid dataset URI!", ex); } return ds; }
From source file:org.wkwk.classifier.MyC45.java
public double computeEntropy(Instances data) { // Hitung kemunculan kelas double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }/*from ww w . j a v a2 s.co m*/ // Hitung entropy double entropy = 0; for (int i = 0; i < data.numClasses(); i++) { if (classCounts[i] > 0) { entropy -= classCounts[i] / data.numInstances() * Utils.log2(classCounts[i] / data.numInstances()); } } return entropy; }
From source file:org.wkwk.classifier.MyC45.java
public Instances[] splitData(Instances data, Attribute attr) { Instances[] splitData = new Instances[attr.numValues()]; for (int i = 0; i < attr.numValues(); i++) { splitData[i] = new Instances(data, data.numInstances()); }//from ww w . j a va2 s . c om Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(attr)].add(inst); } return splitData; }
From source file:org.wkwk.classifier.MyC45.java
public double bestThreshold(Instances data, Attribute attr) { data.sort(attr);//from w w w . j a v a2 s. c o m double m_ig = 0; double bestThr = 0; double classTemp = data.get(0).classValue(); double valueTemp = data.get(0).value(attr); Enumeration instEnum = data.enumerateInstances(); double dt; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (classTemp != inst.classValue()) { classTemp = inst.classValue(); dt = valueTemp; valueTemp = inst.value(attr); double threshold = dt + ((valueTemp - dt) / 2); double igTemp = computeInfoGainCont(data, attr, threshold); if (m_ig < igTemp) { m_ig = igTemp; bestThr = threshold; } } } return bestThr; }
From source file:probcog.bayesnets.learning.CPTLearner.java
License:Open Source License
/** * learns all the examples in the instances. Each instance in the instances represents one example. * All the random variables (nodes) in the network * need to be found in each instance as columns that are named accordingly, i.e. for each * random variable, there must be an attribute with a matching name in the instance. * @param instances the instances * @throws Exception if the result set is empty * @throws SQLException particularly if there is no matching column for one of the node names *//* ww w. ja va 2s . c o m*/ public void learn(Instances instances) throws Exception { if (!initialized) init(); // if it's an empty result set, throw exception if (instances.numInstances() == 0) throw new Exception("empty result set!"); BeliefNode[] nodes = bn.bn.getNodes(); int numAttributes = instances.numAttributes(); // Now we can get much more nodes than attributes // if(numAttributes != nodes.length) // throw new Exception("Result does not contain suitable data (attribute count = " + numAttributes + "; node count = " + nodes.length + ")"); // map node indices to attribute index int[] nodeIdx2colIdx = new int[nodes.length]; Arrays.fill(nodeIdx2colIdx, -1); for (int i = 0; i < numAttributes; i++) { Set<String> nodeNames = bn.getNodeNamesForAttribute(instances.attribute(i).name()); //logger.debug("Nodes for attribute "+instances.attribute(i).name()+": "+nodeNames); if (nodeNames == null) continue; for (String nodeName : nodeNames) { int node_idx = bn.getNodeIndex(nodeName); if (node_idx == -1) throw new Exception("Unknown node referenced in result set: " + instances.attribute(i).name()); nodeIdx2colIdx[node_idx] = i; } } // gather data, iterating over the result set int[] domainIndices = new int[nodes.length]; @SuppressWarnings("unchecked") Enumeration<Instance> instanceEnum = instances.enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = instanceEnum.nextElement(); // for each row... // - get the indices into the domains of each node // that correspond to the current row of data // (sorted in the same order as the nodes are ordered // in the BeliefNetwork) for (int node_idx = 0; node_idx < nodes.length; node_idx++) { int domain_idx; if (clusterers[node_idx] == null) { Discrete domain = (Discrete) nodes[node_idx].getDomain(); String strValue; if (domain instanceof Discretized) { // If we have a discretized domain we discretize first... int colIdx = nodeIdx2colIdx[node_idx]; if (colIdx < 0) { //bn.dump(); /* for (int i = 0; i < numAttributes; i++) { logger.debug("Attribute "+i+": "+instances.attribute(i).name()); } StringBuffer sb = new StringBuffer(); for (int i = 0; i < nodeIdx2colIdx.length; i++) { sb.append(i+"\t"); } sb.append("\n"); for (int i = 0; i < nodeIdx2colIdx.length; i++) { sb.append(nodeIdx2colIdx[i]+"\t"); } logger.debug(sb); */ throw new Exception( "No attribute specified for " + bn.bn.getNodes()[node_idx].getName()); } double value = instance.value(colIdx); strValue = (((Discretized) domain).getNameFromContinuous(value)); /*if (domain.findName(strValue) == -1) { logger.debug(domain); logger.debug(strValue); }*/ } else { int colIdx = nodeIdx2colIdx[node_idx]; if (colIdx < 0) { throw new Exception( "No attribute specified for " + bn.bn.getNodes()[node_idx].getName()); } strValue = instance.stringValue(nodeIdx2colIdx[node_idx]); } domain_idx = domain.findName(strValue); if (domain_idx == -1) { /*String[] myDomain = bn.getDiscreteDomainAsArray(bn.bn.getNodes()[node_idx].getName()); for (int i=0; i<myDomain.length; i++) { logger.debug(myDomain[i]); }*/ throw new Exception(strValue + " not found in domain of " + nodes[node_idx].getName()); } } else { Instance inst = new Instance(1); inst.setValue(0, instance.value(nodeIdx2colIdx[node_idx])); domain_idx = clusterers[node_idx].clusterInstance(inst); } domainIndices[node_idx] = domain_idx; } // - update each node's CPT for (int i = 0; i < nodes.length; i++) { counters[i].count(domainIndices); } } }
From source file:probcog.bayesnets.learning.DomainLearner.java
License:Open Source License
/** * learns all the examples in the result set. Each row in the result set * represents one example. All the random variables (nodes) that have been * scheduled for learning in the constructor need to be found in each result * row as columns that are named accordingly, i.e. for each random variable * for which the domain is to be learnt, there must be a column with a * matching name in the result set./* w ww . j av a 2s . c o m*/ * * @param rs * the result set * @throws Exception * if the result set is empty * @throws SQLException * particularly if there is no matching column for one of the * node names */ public void learn(Instances instances) throws Exception, SQLException { // if it's an empty result set, throw exception if (instances.numInstances() == 0) throw new Exception("empty result set!"); // gather domain data int numDirectDomains = directDomains != null ? directDomains.length : 0; int numClusteredDomains = clusteredDomains != null ? clusteredDomains.length : 0; @SuppressWarnings("unchecked") Enumeration<Instance> instanceEnum = instances.enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = instanceEnum.nextElement(); // for direct learning, add outcomes to the set of outcomes for (int i = 0; i < numDirectDomains; i++) { directDomainData.get(i).add(instance.stringValue(instances.attribute(directDomains[i].getName()))); } // for clustering, gather all instances for (int i = 0; i < numClusteredDomains; i++) { Instance inst = new Instance(1); inst.setValue(attrValue, instance.value(instances.attribute(clusteredDomains[i].nodeName))); clusterData[i].add(inst); } } }