List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:org.pentaho.di.scoring.WekaScoringData.java
License:Open Source License
/** * Helper method that constructs an Instance to input to the Weka model based * on incoming Kettle fields and pre-constructed attribute-to-field mapping * data./*ww w. j av a 2 s .c o m*/ * * @param inputMeta a <code>RowMetaInterface</code> value * @param inputRow an <code>Object</code> value * @param mappingIndexes an <code>int</code> value * @param model a <code>WekaScoringModel</code> value * @return an <code>Instance</code> value */ private Instance constructInstance(RowMetaInterface inputMeta, Object[] inputRow, int[] mappingIndexes, WekaScoringModel model, boolean freshVector) { Instances header = model.getHeader(); // Re-use this array (unless told otherwise) to avoid an object creation if (m_vals == null || freshVector) { m_vals = new double[header.numAttributes()]; } for (int i = 0; i < header.numAttributes(); i++) { if (mappingIndexes[i] >= 0) { try { Object inputVal = inputRow[mappingIndexes[i]]; Attribute temp = header.attribute(i); ValueMetaInterface tempField = inputMeta.getValueMeta(mappingIndexes[i]); int fieldType = tempField.getType(); // Check for missing value (null or empty string) if (tempField.isNull(inputVal)) { m_vals[i] = Utils.missingValue(); continue; } switch (temp.type()) { case Attribute.NUMERIC: { if (fieldType == ValueMetaInterface.TYPE_BOOLEAN) { Boolean b = tempField.getBoolean(inputVal); if (b.booleanValue()) { m_vals[i] = 1.0; } else { m_vals[i] = 0.0; } } else if (fieldType == ValueMetaInterface.TYPE_INTEGER) { Long t = tempField.getInteger(inputVal); m_vals[i] = t.longValue(); } else { Double n = tempField.getNumber(inputVal); m_vals[i] = n.doubleValue(); } } break; case Attribute.NOMINAL: { String s = tempField.getString(inputVal); // now need to look for this value in the attribute // in order to get the correct index int index = temp.indexOfValue(s); if (index < 0) { // set to missing value m_vals[i] = Utils.missingValue(); } else { m_vals[i] = index; } } break; case Attribute.STRING: { String s = tempField.getString(inputVal); // Set the attribute in the header to contain just this string value temp.setStringValue(s); m_vals[i] = 0.0; break; } default: m_vals[i] = Utils.missingValue(); } } catch (Exception e) { m_vals[i] = Utils.missingValue(); } } else { // set to missing value m_vals[i] = Utils.missingValue(); } } Instance newInst = new DenseInstance(1.0, m_vals); newInst.setDataset(header); return newInst; }
From source file:org.processmining.analysis.clusteranalysis.ClusterDecisionAnalyzer.java
License:Open Source License
public Instances getDataInfo() { // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection // scope/*from w w w .j ava2s . c om*/ for (int i = 0; i < agProfiles.numberOfItems(); i++) { if (checks[i].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); } } // for target concept FastVector my_nominal_values = new FastVector(clusters.getClusters().size()); Attribute targetConcept = null; for (Cluster aCluster : clusters.getClusters()) { my_nominal_values.addElement(aCluster.getName()); } targetConcept = new Attribute("Cluster", my_nominal_values); attributeInfo.addElement(targetConcept); attributeInfo.trimToSize(); // learning Instances data = new Instances("Clustering", attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); for (Cluster aCluster : clusters.getClusters()) { String clusterName = aCluster.getName(); for (Integer i : aCluster.getTraceIndices()) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < agProfiles.numberOfItems(); j++) { if (checks[j].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { System.out.println("fail to add"); } } } instance0.setDataset(data); instance0.setClassValue(clusterName); data.add(instance0); } } return data; }
From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTrace.java
License:Open Source License
/** * Creates a learning instance that can be used by a weka data mining * algorithm. Note that loop semantics are not yet taken into account, * therefore each trace is exactly represented by one learning instance. * /*from ww w.j a v a 2 s . c o m*/ * @param dataset * the enclosing data set * @param attInfoSize * the number of attributes (including the class attribute) * specified by the instance * @return the learning instance representing this log trace */ public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch, DecisionMiningLogReader log, DecisionPointContext context) { Instance instance = new Instance(attInfoSize); // provide global attribute values Map<String, String> piData = myInstance.getAttributes(); evaluateDataForInstance(piData, instance, dataset); // provide ATE attribute values Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator(); while (ates.hasNext()) { AuditTrailEntry ate = ates.next(); Map<String, String> ateData = ate.getAttributes(); evaluateDataForInstance(ateData, instance, dataset); // Which logEvent happened? LogEvent le = log.findLogEvent(ate); // check whether trace should be further traversed // (or whether learning instance should be build from current state) if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE) || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) { // stop with data attributes up to current state of trace break; } } // make enclosing dataset known to this instance instance.setDataset(dataset); return instance; }
From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTraceForAuLdg.java
License:Open Source License
/** * Creates a learning instance that can be used by a weka data mining * algorithm. Note that loop semantics are not yet taken into account, * therefore each trace is exactly represented by one learning instance. * /*from ww w . j a v a 2 s . com*/ * @param dataset * the enclosing data set * @param attInfoSize * the number of attributes (including the class attribute) specified * by the instance * @return the learning instance representing this log trace */ public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategoryForAuLdg branch, DecisionMiningLogReaderForAuLdg log, DecisionPointContextForAuLdg context) { Instance instance = new Instance(attInfoSize); // provide global attribute values Map<String, String> piData = myInstance.getAttributes(); evaluateDataForInstance(piData, instance, dataset); // provide ATE attribute values Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator(); while (ates.hasNext()) { AuditTrailEntry ate = ates.next(); Map<String, String> ateData = ate.getAttributes(); evaluateDataForInstance(ateData, instance, dataset); // Which logEvent happened? LogEvent le = log.findLogEvent(ate); // check whether trace should be further traversed // (or whether learning instance should be build from current state) if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE) || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) { // stop with data attributes up to current state of trace break; } } // make enclosing dataset known to this instance instance.setDataset(dataset); return instance; }
From source file:org.processmining.analysis.traceclustering.profile.AggregateProfile.java
License:Open Source License
public Instances getWekaData() { Instances data = null;/*from w w w . ja v a2 s . c o m*/ // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection // scope for (int i = 0; i < numberOfItems(); i++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); } attributeInfo.trimToSize(); data = new Instances("Clustering", attributeInfo, 0); try { for (int i = 0; i < getLog().numberOfInstances(); i++) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < numberOfItems(); j++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { Message.add("Weka Error: fail to add", Message.ERROR); } } instance0.setDataset(data); data.add(instance0); } } catch (Exception c) { Message.add("Weka Error: " + c.toString(), Message.ERROR); } return data; }
From source file:org.prom5.analysis.clusteranalysis.ClusterDecisionAnalyzer.java
License:Open Source License
public Instances getDataInfo() { // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection scope for (int i = 0; i < agProfiles.numberOfItems(); i++) { if (checks[i].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); }/*from w w w. j a v a 2 s . c om*/ } // for target concept FastVector my_nominal_values = new FastVector(clusters.getClusters().size()); Attribute targetConcept = null; for (Cluster aCluster : clusters.getClusters()) { my_nominal_values.addElement(aCluster.getName()); } targetConcept = new Attribute("Cluster", my_nominal_values); attributeInfo.addElement(targetConcept); attributeInfo.trimToSize(); // learning Instances data = new Instances("Clustering", attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); for (Cluster aCluster : clusters.getClusters()) { String clusterName = aCluster.getName(); for (Integer i : aCluster.getTraceIndices()) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < agProfiles.numberOfItems(); j++) { if (checks[j].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { System.out.println("fail to add"); } } } instance0.setDataset(data); instance0.setClassValue(clusterName); data.add(instance0); } } return data; }
From source file:org.prom5.analysis.decisionmining.DecisionMiningLogTrace.java
License:Open Source License
/** * Creates a learning instance that can be used by a weka data mining algorithm. * Note that loop semantics are not yet taken into account, therefore each trace * is exactly represented by one learning instance. * @param dataset the enclosing data set * @param attInfoSize the number of attributes (including the class attribute) specified by the instance * @return the learning instance representing this log trace *///from ww w.j av a 2 s. c om public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch, DecisionMiningLogReader log, DecisionPointContext context) { Instance instance = new Instance(attInfoSize); // provide global attribute values Map<String, String> piData = myInstance.getAttributes(); evaluateDataForInstance(piData, instance, dataset); // provide ATE attribute values Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator(); while (ates.hasNext()) { AuditTrailEntry ate = ates.next(); Map<String, String> ateData = ate.getAttributes(); evaluateDataForInstance(ateData, instance, dataset); // Which logEvent happened? LogEvent le = log.findLogEvent(ate); // check whether trace should be further traversed // (or whether learning instance should be build from current state) if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE) || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) { // stop with data attributes up to current state of trace break; } } // make enclosing dataset known to this instance instance.setDataset(dataset); return instance; }
From source file:org.prom5.analysis.traceclustering.profile.AggregateProfile.java
License:Open Source License
public Instances getWekaData() { Instances data = null;/* w w w .j a v a 2 s . c om*/ // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection scope for (int i = 0; i < numberOfItems(); i++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); } attributeInfo.trimToSize(); data = new Instances("Clustering", attributeInfo, 0); try { for (int i = 0; i < getLog().numberOfInstances(); i++) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < numberOfItems(); j++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { Message.add("Weka Error: fail to add", Message.ERROR); } } instance0.setDataset(data); data.add(instance0); } } catch (Exception c) { Message.add("Weka Error: " + c.toString(), Message.ERROR); } return data; }
From source file:org.sd.classifier.WekaClassifier.java
License:Open Source License
public ClassificationResult classify(FeatureVector featureVector, FeatureDictionary featureDictionary) { double[] attrValues = featureDictionary.getFeatureVectorAsDouble(featureVector); // we are required to put a weight for each instance, so we weight them all as 1.0 Instance instance = new Instance(1.0, attrValues); instance.setDataset(dataSet); ClassificationResult classificationResult = null; try {// www . j a v a2 s .c o m double[] distribution = classifier.distributionForInstance(instance); double value = classifier.classifyInstance(instance); int index = distribution.length == 1 ? 0 : MathUtil.toInt(value); classificationResult = new WekaClassificationResult(value, distribution[index], featureDictionary.getClassificationLabel(value), featureVector); } catch (Exception e) { e.printStackTrace(); } return classificationResult; }
From source file:org.wikipedia.miner.annotation.Disambiguator.java
License:Open Source License
/** * returns the probability (between 0 and 1) of a sense with the given commonness and relatedness being valid * given the available context./*w ww . jav a 2 s .com*/ * * @param commonness the commonness of the sense (it's prior probability, irrespective of context) * @param relatedness the relatedness of the sense to the given context (the result of calling context.getRelatednessTo() * @param context the available context. * @return the probability that the sense implied here is valid. * @throws Exception if we cannot classify this sense. */ public double getProbabilityOfSense(double commonness, double relatedness, Context context) throws Exception { double[] values = new double[attributes.size()]; values[0] = commonness; values[1] = relatedness; values[2] = context.getQuality(); //values[2] = context.getSize() ; //values[3] = context.getTotalRelatedness() ; //values[4] = context.getTotalLinkLikelyhood() ; values[3] = Instance.missingValue(); Instance i = new Instance(1.0, values); i.setDataset(header); return classifier.distributionForInstance(i)[0]; }