Example usage for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:org.pentaho.di.scoring.WekaScoringData.java

License:Open Source License

/**
 * Helper method that constructs an Instance to input to the Weka model based
 * on incoming Kettle fields and pre-constructed attribute-to-field mapping
 * data./*ww  w.  j  av  a  2 s .c  o m*/
 *
 * @param inputMeta      a <code>RowMetaInterface</code> value
 * @param inputRow       an <code>Object</code> value
 * @param mappingIndexes an <code>int</code> value
 * @param model          a <code>WekaScoringModel</code> value
 * @return an <code>Instance</code> value
 */
private Instance constructInstance(RowMetaInterface inputMeta, Object[] inputRow, int[] mappingIndexes,
        WekaScoringModel model, boolean freshVector) {

    Instances header = model.getHeader();

    // Re-use this array (unless told otherwise) to avoid an object creation
    if (m_vals == null || freshVector) {
        m_vals = new double[header.numAttributes()];
    }

    for (int i = 0; i < header.numAttributes(); i++) {

        if (mappingIndexes[i] >= 0) {
            try {
                Object inputVal = inputRow[mappingIndexes[i]];

                Attribute temp = header.attribute(i);
                ValueMetaInterface tempField = inputMeta.getValueMeta(mappingIndexes[i]);
                int fieldType = tempField.getType();

                // Check for missing value (null or empty string)
                if (tempField.isNull(inputVal)) {
                    m_vals[i] = Utils.missingValue();
                    continue;
                }

                switch (temp.type()) {
                case Attribute.NUMERIC: {
                    if (fieldType == ValueMetaInterface.TYPE_BOOLEAN) {
                        Boolean b = tempField.getBoolean(inputVal);
                        if (b.booleanValue()) {
                            m_vals[i] = 1.0;
                        } else {
                            m_vals[i] = 0.0;
                        }
                    } else if (fieldType == ValueMetaInterface.TYPE_INTEGER) {
                        Long t = tempField.getInteger(inputVal);
                        m_vals[i] = t.longValue();
                    } else {
                        Double n = tempField.getNumber(inputVal);
                        m_vals[i] = n.doubleValue();
                    }
                }
                    break;
                case Attribute.NOMINAL: {
                    String s = tempField.getString(inputVal);
                    // now need to look for this value in the attribute
                    // in order to get the correct index
                    int index = temp.indexOfValue(s);
                    if (index < 0) {
                        // set to missing value
                        m_vals[i] = Utils.missingValue();
                    } else {
                        m_vals[i] = index;
                    }
                }
                    break;
                case Attribute.STRING: {
                    String s = tempField.getString(inputVal);
                    // Set the attribute in the header to contain just this string value
                    temp.setStringValue(s);
                    m_vals[i] = 0.0;
                    break;
                }
                default:
                    m_vals[i] = Utils.missingValue();
                }
            } catch (Exception e) {
                m_vals[i] = Utils.missingValue();
            }
        } else {
            // set to missing value
            m_vals[i] = Utils.missingValue();
        }
    }

    Instance newInst = new DenseInstance(1.0, m_vals);
    newInst.setDataset(header);
    return newInst;
}

From source file:org.processmining.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope/*from  w  w  w  .j ava2s .  c  om*/
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTrace.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining
 * algorithm. Note that loop semantics are not yet taken into account,
 * therefore each trace is exactly represented by one learning instance.
 * /*from ww  w.j  a v a 2  s .  c o  m*/
 * @param dataset
 *            the enclosing data set
 * @param attInfoSize
 *            the number of attributes (including the class attribute)
 *            specified by the instance
 * @return the learning instance representing this log trace
 */
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch,
        DecisionMiningLogReader log, DecisionPointContext context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTraceForAuLdg.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining
 * algorithm. Note that loop semantics are not yet taken into account,
 * therefore each trace is exactly represented by one learning instance.
 * /*from ww  w . j  a v  a  2 s  . com*/
 * @param dataset
 *          the enclosing data set
 * @param attInfoSize
 *          the number of attributes (including the class attribute) specified
 *          by the instance
 * @return the learning instance representing this log trace
 */
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategoryForAuLdg branch,
        DecisionMiningLogReaderForAuLdg log, DecisionPointContextForAuLdg context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.processmining.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;/*from  w  w w . ja v a2  s  . c o  m*/

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.prom5.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }/*from   w w  w. j  a v  a  2  s  .  c  om*/
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.prom5.analysis.decisionmining.DecisionMiningLogTrace.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining algorithm.
 * Note that loop semantics are not yet taken into account, therefore each trace
 * is exactly represented by one learning instance.
 * @param dataset the enclosing data set
 * @param attInfoSize the number of attributes (including the class attribute) specified by the instance
 * @return the learning instance representing this log trace
 *///from   ww w.j  av a 2  s. c om
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch,
        DecisionMiningLogReader log, DecisionPointContext context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed 
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.prom5.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;/* w  w w  .j  a  v  a  2  s . c  om*/

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.sd.classifier.WekaClassifier.java

License:Open Source License

public ClassificationResult classify(FeatureVector featureVector, FeatureDictionary featureDictionary) {
    double[] attrValues = featureDictionary.getFeatureVectorAsDouble(featureVector);

    // we are required to put a weight for each instance, so we weight them all as 1.0
    Instance instance = new Instance(1.0, attrValues);
    instance.setDataset(dataSet);

    ClassificationResult classificationResult = null;

    try {// www .  j  a v  a2  s  .c  o  m
        double[] distribution = classifier.distributionForInstance(instance);
        double value = classifier.classifyInstance(instance);
        int index = distribution.length == 1 ? 0 : MathUtil.toInt(value);
        classificationResult = new WekaClassificationResult(value, distribution[index],
                featureDictionary.getClassificationLabel(value), featureVector);

    } catch (Exception e) {
        e.printStackTrace();
    }

    return classificationResult;
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

/**
 * returns the probability (between 0 and 1) of a sense with the given commonness and relatedness being valid
 * given the available context./*w  ww  . jav  a 2  s  .com*/
 * 
 * @param commonness the commonness of the sense (it's prior probability, irrespective of context)
 * @param relatedness the relatedness of the sense to the given context (the result of calling context.getRelatednessTo()
 * @param context the available context.
 * @return the probability that the sense implied here is valid.
 * @throws Exception if we cannot classify this sense.
 */
public double getProbabilityOfSense(double commonness, double relatedness, Context context) throws Exception {

    double[] values = new double[attributes.size()];

    values[0] = commonness;
    values[1] = relatedness;
    values[2] = context.getQuality();
    //values[2] = context.getSize() ;
    //values[3] = context.getTotalRelatedness() ;
    //values[4] = context.getTotalLinkLikelyhood() ;

    values[3] = Instance.missingValue();

    Instance i = new Instance(1.0, values);
    i.setDataset(header);

    return classifier.distributionForInstance(i)[0];
}