Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:org.pentaho.di.scoring.WekaScoringData.java

License:Open Source License

/**
 * Helper method that constructs an Instance to input to the Weka model based
 * on incoming Kettle fields and pre-constructed attribute-to-field mapping
 * data./*ww  w.  j  av  a  2 s .c  o m*/
 *
 * @param inputMeta      a <code>RowMetaInterface</code> value
 * @param inputRow       an <code>Object</code> value
 * @param mappingIndexes an <code>int</code> value
 * @param model          a <code>WekaScoringModel</code> value
 * @return an <code>Instance</code> value
 */
private Instance constructInstance(RowMetaInterface inputMeta, Object[] inputRow, int[] mappingIndexes,
        WekaScoringModel model, boolean freshVector) {

    Instances header = model.getHeader();

    // Re-use this array (unless told otherwise) to avoid an object creation
    if (m_vals == null || freshVector) {
        m_vals = new double[header.numAttributes()];
    }

    for (int i = 0; i < header.numAttributes(); i++) {

        if (mappingIndexes[i] >= 0) {
            try {
                Object inputVal = inputRow[mappingIndexes[i]];

                Attribute temp = header.attribute(i);
                ValueMetaInterface tempField = inputMeta.getValueMeta(mappingIndexes[i]);
                int fieldType = tempField.getType();

                // Check for missing value (null or empty string)
                if (tempField.isNull(inputVal)) {
                    m_vals[i] = Utils.missingValue();
                    continue;
                }

                switch (temp.type()) {
                case Attribute.NUMERIC: {
                    if (fieldType == ValueMetaInterface.TYPE_BOOLEAN) {
                        Boolean b = tempField.getBoolean(inputVal);
                        if (b.booleanValue()) {
                            m_vals[i] = 1.0;
                        } else {
                            m_vals[i] = 0.0;
                        }
                    } else if (fieldType == ValueMetaInterface.TYPE_INTEGER) {
                        Long t = tempField.getInteger(inputVal);
                        m_vals[i] = t.longValue();
                    } else {
                        Double n = tempField.getNumber(inputVal);
                        m_vals[i] = n.doubleValue();
                    }
                }
                    break;
                case Attribute.NOMINAL: {
                    String s = tempField.getString(inputVal);
                    // now need to look for this value in the attribute
                    // in order to get the correct index
                    int index = temp.indexOfValue(s);
                    if (index < 0) {
                        // set to missing value
                        m_vals[i] = Utils.missingValue();
                    } else {
                        m_vals[i] = index;
                    }
                }
                    break;
                case Attribute.STRING: {
                    String s = tempField.getString(inputVal);
                    // Set the attribute in the header to contain just this string value
                    temp.setStringValue(s);
                    m_vals[i] = 0.0;
                    break;
                }
                default:
                    m_vals[i] = Utils.missingValue();
                }
            } catch (Exception e) {
                m_vals[i] = Utils.missingValue();
            }
        } else {
            // set to missing value
            m_vals[i] = Utils.missingValue();
        }
    }

    Instance newInst = new DenseInstance(1.0, m_vals);
    newInst.setDataset(header);
    return newInst;
}

From source file:org.processmining.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope/*from  w  w  w  .j ava2s .  c  om*/
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTrace.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining
 * algorithm. Note that loop semantics are not yet taken into account,
 * therefore each trace is exactly represented by one learning instance.
 * /*from ww  w.j  a v a 2  s .  c o  m*/
 * @param dataset
 *            the enclosing data set
 * @param attInfoSize
 *            the number of attributes (including the class attribute)
 *            specified by the instance
 * @return the learning instance representing this log trace
 */
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch,
        DecisionMiningLogReader log, DecisionPointContext context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.processmining.analysis.decisionmining.DecisionMiningLogTraceForAuLdg.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining
 * algorithm. Note that loop semantics are not yet taken into account,
 * therefore each trace is exactly represented by one learning instance.
 * /*from ww  w . j  a v  a  2 s  . com*/
 * @param dataset
 *          the enclosing data set
 * @param attInfoSize
 *          the number of attributes (including the class attribute) specified
 *          by the instance
 * @return the learning instance representing this log trace
 */
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategoryForAuLdg branch,
        DecisionMiningLogReaderForAuLdg log, DecisionPointContextForAuLdg context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.processmining.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;/*from  w  w w . ja v a2  s  . c o  m*/

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.prom5.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }/*from   w w  w. j  a v  a  2  s  .  c  om*/
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.prom5.analysis.decisionmining.DecisionMiningLogTrace.java

License:Open Source License

/**
 * Creates a learning instance that can be used by a weka data mining algorithm.
 * Note that loop semantics are not yet taken into account, therefore each trace
 * is exactly represented by one learning instance.
 * @param dataset the enclosing data set
 * @param attInfoSize the number of attributes (including the class attribute) specified by the instance
 * @return the learning instance representing this log trace
 *///from   ww w.j  av a 2  s. c om
public Instance makeInstance(Instances dataset, int attInfoSize, DecisionCategory branch,
        DecisionMiningLogReader log, DecisionPointContext context) {
    Instance instance = new Instance(attInfoSize);

    // provide global attribute values
    Map<String, String> piData = myInstance.getAttributes();
    evaluateDataForInstance(piData, instance, dataset);

    // provide ATE attribute values
    Iterator<AuditTrailEntry> ates = myInstance.getAuditTrailEntryList().iterator();
    while (ates.hasNext()) {
        AuditTrailEntry ate = ates.next();
        Map<String, String> ateData = ate.getAttributes();
        evaluateDataForInstance(ateData, instance, dataset);

        // Which logEvent happened?
        LogEvent le = log.findLogEvent(ate);
        // check whether trace should be further traversed 
        // (or whether learning instance should be build from current state)
        if (branch.contains(le) && ((context.getAttributeSelectionScope() == AttributeSelectionScope.ALL_BEFORE)
                || (context.getAttributeSelectionScope() == AttributeSelectionScope.JUST_BEFORE))) {
            // stop with data attributes up to current state of trace
            break;
        }
    }
    // make enclosing dataset known to this instance
    instance.setDataset(dataset);
    return instance;
}

From source file:org.prom5.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;/* w  w w  .j  a  v  a  2  s . c  om*/

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.sd.classifier.WekaClassifier.java

License:Open Source License

public ClassificationResult classify(FeatureVector featureVector, FeatureDictionary featureDictionary) {
    double[] attrValues = featureDictionary.getFeatureVectorAsDouble(featureVector);

    // we are required to put a weight for each instance, so we weight them all as 1.0
    Instance instance = new Instance(1.0, attrValues);
    instance.setDataset(dataSet);

    ClassificationResult classificationResult = null;

    try {// www .  j  a v  a2  s  .c  o  m
        double[] distribution = classifier.distributionForInstance(instance);
        double value = classifier.classifyInstance(instance);
        int index = distribution.length == 1 ? 0 : MathUtil.toInt(value);
        classificationResult = new WekaClassificationResult(value, distribution[index],
                featureDictionary.getClassificationLabel(value), featureVector);

    } catch (Exception e) {
        e.printStackTrace();
    }

    return classificationResult;
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

/**
 * returns the probability (between 0 and 1) of a sense with the given commonness and relatedness being valid
 * given the available context./*w  ww  . jav  a 2  s  .com*/
 * 
 * @param commonness the commonness of the sense (it's prior probability, irrespective of context)
 * @param relatedness the relatedness of the sense to the given context (the result of calling context.getRelatednessTo()
 * @param context the available context.
 * @return the probability that the sense implied here is valid.
 * @throws Exception if we cannot classify this sense.
 */
public double getProbabilityOfSense(double commonness, double relatedness, Context context) throws Exception {

    double[] values = new double[attributes.size()];

    values[0] = commonness;
    values[1] = relatedness;
    values[2] = context.getQuality();
    //values[2] = context.getSize() ;
    //values[3] = context.getTotalRelatedness() ;
    //values[4] = context.getTotalLinkLikelyhood() ;

    values[3] = Instance.missingValue();

    Instance i = new Instance(1.0, values);
    i.setDataset(header);

    return classifier.distributionForInstance(i)[0];
}