Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

/**
 * Does validation and integrity checks between data set and meta-data. The appropriate exception is
 * thrown if any inconsistencies of validation rules breached.
 * The passed data set and meta-data are not modified in any way.
 *///  www .  ja  va  2 s  .  c om
private void validate(Instances dataSet, LabelsMetaData labelsMetaData) throws InvalidDataFormatException {
    Set<String> labelNames = labelsMetaData.getLabelNames();
    if (labelNames.size() < 2) {
        throw new InvalidDataFormatException(String.format(
                "There must be at least 2 label attributes specified, but only '%s' are defined in metadata",
                labelNames.size()));
    }

    int numAttributes = dataSet.numAttributes();
    int numMatches = 0;
    for (int index = 0; index < numAttributes; index++) {
        Attribute attribute = dataSet.attribute(index);
        if (labelNames.contains(attribute.name())) {
            numMatches++;
            if (!checkLabelAttributeFormat(attribute)) {
                throw new InvalidDataFormatException(
                        String.format("The format of label attribute '%s' is not valid.", attribute.name()));
            }
        }
    }

    if (numMatches != labelNames.size()) {
        throw new InvalidDataFormatException(
                String.format("Not all labels defined in meta-data are present in ARFF data file."));
    }

    if (labelsMetaData.isHierarchy()) {
        checkLabelsConsistency(dataSet, labelsMetaData.getRootLabels());
    }
}

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

private void checkLabelsConsistency(Instances dataSet, Set<LabelNode> rootLabelNodes)
        throws InvalidDataFormatException {
    // create an index for faster access to attribute based on name
    Map<String, Attribute> attributesIndex = new HashMap<String, Attribute>();
    for (int index = 0; index < dataSet.numAttributes(); index++) {
        Attribute attribute = dataSet.attribute(index);
        attributesIndex.put(attribute.name(), attribute);
    }//from ww  w .  jav a 2 s . c o  m

    int numInstances = dataSet.numInstances();
    for (int index = 0; index < numInstances; index++) {
        Instance instance = dataSet.instance(index);
        for (LabelNode labelNode : rootLabelNodes) {
            checkSubtreeConsistency(labelNode, instance, true, attributesIndex);
        }
    }
}

From source file:mulan.data.Statistics.java

License:Open Source License

/** 
 * calculates various multilabel statistics, such as label cardinality, <br>
 * label density and the set of distinct labels along with their frequency
 * // w ww  .ja v  a 2 s . co  m
 * @param mlData a multi-label dataset
 */
public void calculateStats(MultiLabelInstances mlData) {
    // initialize statistics
    Instances data = mlData.getDataSet();
    numLabels = mlData.getNumLabels();
    int[] labelIndices = mlData.getLabelIndices();
    int[] featureIndices = mlData.getFeatureIndices();
    numPredictors = featureIndices.length;

    labelCardinality = 0;
    numNominal = 0;
    numNumeric = 0;
    examplesPerLabel = new double[numLabels];
    cardinalityDistribution = new double[numLabels + 1];
    labelsets = new HashMap<LabelSet, Integer>();

    // gather statistics
    for (int i = 0; i < featureIndices.length; i++) {
        if (data.attribute(featureIndices[i]).isNominal()) {
            numNominal++;
        }
        if (data.attribute(featureIndices[i]).isNumeric()) {
            numNumeric++;
        }
    }

    numInstances = data.numInstances();
    for (int i = 0; i < numInstances; i++) {
        int exampleCardinality = 0;
        double[] dblLabels = new double[numLabels];
        for (int j = 0; j < numLabels; j++) {
            if (data.instance(i).stringValue(labelIndices[j]).equals("1")) {
                dblLabels[j] = 1;
                exampleCardinality++;
                labelCardinality++;
                examplesPerLabel[j]++;
            } else {
                dblLabels[j] = 0;
            }
        }
        cardinalityDistribution[exampleCardinality]++;

        LabelSet labelSet = new LabelSet(dblLabels);
        if (labelsets.containsKey(labelSet)) {
            labelsets.put(labelSet, labelsets.get(labelSet) + 1);
        } else {
            labelsets.put(labelSet, 1);
        }
    }

    labelCardinality /= numInstances;
    labelDensity = labelCardinality / numLabels;
    for (int j = 0; j < numLabels; j++) {
        examplesPerLabel[j] /= numInstances;
    }
}

From source file:mulan.transformations.IncludeLabelsTransformation.java

License:Open Source License

/**
 *
 * @param mlData multi-label data//  w ww.j  a v  a  2s . c  o  m
 * @return transformed instances
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    int numLabels = mlData.getNumLabels();
    labelIndices = mlData.getLabelIndices();

    // remove all labels
    Instances transformed = RemoveAllLabels.transformInstances(mlData);

    // add at the end an attribute with values the label names
    ArrayList<String> labelNames = new ArrayList<String>(numLabels);
    for (int counter = 0; counter < numLabels; counter++) {
        labelNames.add(mlData.getDataSet().attribute(labelIndices[counter]).name());
    }
    Attribute attrLabel = new Attribute("Label", labelNames);
    transformed.insertAttributeAt(attrLabel, transformed.numAttributes());

    // and at the end a binary attribute
    ArrayList<String> binaryValues = new ArrayList<String>(2);
    binaryValues.add("0");
    binaryValues.add("1");
    Attribute classAttr = new Attribute("Class", binaryValues);
    transformed.insertAttributeAt(classAttr, transformed.numAttributes());

    // add instances
    transformed = new Instances(transformed, 0);
    transformed.setClassIndex(transformed.numAttributes() - 1);
    Instances data = mlData.getDataSet();
    for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) {
        for (int labelCounter = 0; labelCounter < numLabels; labelCounter++) {
            Instance temp;
            temp = RemoveAllLabels.transformInstance(data.instance(instanceIndex), labelIndices);
            temp.setDataset(null);
            temp.insertAttributeAt(temp.numAttributes());
            temp.insertAttributeAt(temp.numAttributes());
            temp.setDataset(transformed);
            temp.setValue(temp.numAttributes() - 2, (String) labelNames.get(labelCounter));
            if (data.attribute(labelIndices[labelCounter])
                    .value((int) data.instance(instanceIndex).value(labelIndices[labelCounter])).equals("1")) {
                temp.setValue(temp.numAttributes() - 1, "1");
            } else {
                temp.setValue(temp.numAttributes() - 1, "0");
            }
            transformed.add(temp);
        }
    }

    return transformed;
}

From source file:mulan.transformations.LabelPowersetTransformation.java

License:Open Source License

public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    Instances data = mlData.getDataSet();
    int numLabels = mlData.getNumLabels();
    int[] labelIndices = mlData.getLabelIndices();

    Instances newData = null;/*from ww w  .j  av  a  2  s  . c  o m*/

    // gather distinct label combinations
    HashSet<LabelSet> labelSets = new HashSet<LabelSet>();
    int numInstances = data.numInstances();
    for (int i = 0; i < numInstances; i++) {
        // construct labelset
        double[] dblLabels = new double[numLabels];
        for (int j = 0; j < numLabels; j++) {
            int index = labelIndices[j];
            dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index)));
        }
        LabelSet labelSet = new LabelSet(dblLabels);

        // add labelset if not already present
        labelSets.add(labelSet);
    }

    // create class attribute
    ArrayList<String> classValues = new ArrayList<String>(labelSets.size());
    for (LabelSet subset : labelSets) {
        classValues.add(subset.toBitString());
    }
    Attribute newClass = new Attribute("class", classValues);

    // remove all labels
    newData = RemoveAllLabels.transformInstances(data, labelIndices);

    // add new class attribute
    newData.insertAttributeAt(newClass, newData.numAttributes());
    newData.setClassIndex(newData.numAttributes() - 1);

    // add class values
    for (int i = 0; i < newData.numInstances(); i++) {
        //System.out.println(newData.instance(i).toString());
        String strClass = "";
        for (int j = 0; j < numLabels; j++) {
            int index = labelIndices[j];
            strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index));
        }
        //System.out.println(strClass);
        newData.instance(i).setClassValue(strClass);
    }
    transformedFormat = new Instances(newData, 0);
    return newData;
}

From source file:mulan.transformations.PT6Transformation.java

License:Open Source License

public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    int numLabels = mlData.getNumLabels();
    labelIndices = mlData.getLabelIndices();

    // remove all labels
    Instances transformed = RemoveAllLabels.transformInstances(mlData);

    // add at the end an attribute with values the label names
    ArrayList<String> labelNames = new ArrayList<String>(numLabels);
    for (int counter = 0; counter < numLabels; counter++) {
        labelNames.add(mlData.getDataSet().attribute(labelIndices[counter]).name());
    }/*from   w  w w.j  a v a  2s.co m*/
    Attribute attrLabel = new Attribute("Label", labelNames);
    transformed.insertAttributeAt(attrLabel, transformed.numAttributes());

    // and at the end a binary attribute
    ArrayList<String> binaryValues = new ArrayList<String>(2);
    binaryValues.add("0");
    binaryValues.add("1");
    Attribute classAttr = new Attribute("Class", binaryValues);
    transformed.insertAttributeAt(classAttr, transformed.numAttributes());

    // add instances
    transformed = new Instances(transformed, 0);
    transformed.setClassIndex(transformed.numAttributes() - 1);
    Instances data = mlData.getDataSet();
    for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) {
        for (int labelCounter = 0; labelCounter < numLabels; labelCounter++) {
            Instance temp;
            temp = RemoveAllLabels.transformInstance(data.instance(instanceIndex), labelIndices);
            temp.setDataset(null);
            temp.insertAttributeAt(temp.numAttributes());
            temp.insertAttributeAt(temp.numAttributes());
            temp.setDataset(transformed);
            temp.setValue(temp.numAttributes() - 2, (String) labelNames.get(labelCounter));
            if (data.attribute(labelIndices[labelCounter])
                    .value((int) data.instance(instanceIndex).value(labelIndices[labelCounter])).equals("1")) {
                temp.setValue(temp.numAttributes() - 1, "1");
            } else {
                temp.setValue(temp.numAttributes() - 1, "0");
            }
            transformed.add(temp);
        }
    }

    return transformed;
}

From source file:mulan.transformations.regression.ChainTransformation.java

License:Open Source License

/**
 * Deletes all target attributes that appear after the first targetsToKeep in the chain. The
 * target attribute at position targetsToKeep in the chain is set as the class attribute.
 * /*from  w ww  . j  ava 2 s  .co m*/
 * @param data the input data set
 * @param chain a chain (permutation) of the indices of the target attributes
 * @param numTargetsToKeep the number of target attributes from the beginning of the chain that
 *            should be kept, 1&lt;=numTargetsToKeep&lt;=numOfTargets
 * @return the transformed Instances object. The input object is not modified.
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public static Instances transformInstances(Instances data, int[] chain, int numTargetsToKeep) throws Exception {
    int numOfTargets = chain.length;
    if (numTargetsToKeep < 1 || numTargetsToKeep > numOfTargets) {
        throw new Exception("keepFirstKTargets should be between 1 and numOfTargets");
    }
    // Indices of attributes to remove
    int[] indicesToRemove = new int[numOfTargets - numTargetsToKeep];
    // the indices of the target attributes whose position in the chain is
    // after the first keepFirstKTargets attributes are marked for removal
    for (int i = 0; i < numOfTargets - numTargetsToKeep; i++) {
        indicesToRemove[i] = chain[numTargetsToKeep + i];
    }

    Remove remove = new Remove();
    remove.setAttributeIndicesArray(indicesToRemove);
    remove.setInputFormat(data);
    // get the class attribute name, the name of the target attribute which is placed in the
    // targetsToKeep position of the chain
    String classAttributeName = data.attribute(chain[numTargetsToKeep - 1]).name();
    Instances transformed = Filter.useFilter(data, remove);
    transformed.setClass(transformed.attribute(classAttributeName));
    return transformed;
}

From source file:myclassifier.MyC45.java

/**
 * Method building ID3 tree.//from   www  .ja  va 2s  .  com
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = -1; //Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] gainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        gainRatios[att.index()] = computeGainRatio(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.

    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyC45[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

public void buildClassifier(Instances dataSet) throws Exception {
    // Initialize the remaining instance variables.
    numSubsets = 0;/*from   www . ja  v a  2  s .  co  m*/
    splitPointValue = Double.MAX_VALUE;
    infoGain = 0;
    gainRatio = 0;

    // Different treatment for enumerated and numeric attributes.
    if (dataSet.attribute(attribIndex).isNominal()) {
        numOfBranches = dataSet.attribute(attribIndex).numValues();
        numOfSplitPoints = dataSet.attribute(attribIndex).numValues();
        handleNominalAttribute(dataSet);
    } else { //attribute numeric
        numOfBranches = 2;
        numOfSplitPoints = 0;
        dataSet.sort(dataSet.attribute(attribIndex));
        handleNumericAttribute(dataSet);
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double temp;//from w ww.java 2 s . c o m
    Instance instance;

    if ((allInstances.attribute(attribIndex).isNumeric()) && (numSubsets > 1)) {
        Enumeration instancesEnum = allInstances.enumerateInstances();
        while (instancesEnum.hasMoreElements()) {
            instance = (Instance) instancesEnum.nextElement();
            if (!instance.isMissing(attribIndex)) {
                temp = instance.value(attribIndex);
                if ((temp > newSplitPoint) && (temp <= splitPointValue)) {
                    newSplitPoint = temp;
                }
            }
        }
        splitPointValue = newSplitPoint;
    }
}