Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:moa.core.VectorDistances.java

License:Apache License

/**
 * Coefficient of Divergence (Legendre and Legendre, 1983)
 * Also known as the Canberra distance/*  w ww . ja va2 s .c o  m*/
 * @param src first data point to compare from
 * @param dst second data point to compare to
 * @param header  feature weight (strictly speaking, all weights should be 1 for pure Minkowski)
 * @return distance
 */
public static synchronized double distanceDivergence(double[] src, double[] dst, Instances header) {
    double ret = 0.0;
    int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes()));
    if (minSize < 1) {
        return Double.MAX_VALUE;
    }
    for (int i = 0; i < minSize; i++) {
        if (Math.abs(src[i] + dst[i]) <= 0)
            continue;
        double d = Math.abs((src[i] - dst[i]) / (src[i] + dst[i]));
        ret += d * d * header.attribute(i).weight();
    }
    ret = Math.sqrt(ret / minSize);
    // Safety...
    if (Double.isInfinite(ret)) {
        ret = Double.MAX_VALUE;
    } else if (Double.isNaN(ret)) {
        ret = 0.0;
    }
    return ret;
}

From source file:moa.core.VectorDistances.java

License:Apache License

/**
 * Bray-Curtis distance/*from   w w w  .  j  a  va  2 s . c o m*/
 * @param src first data point to compare from
 * @param dst second data point to compare to
 * @param header feature weight and meta-data 
 * @return distance
 */
public static synchronized double distanceBray(double[] src, double[] dst, Instances header) {
    double ret = 0.0;
    int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes()));
    if (minSize < 1) {
        return Double.MAX_VALUE;
    }
    double numerator = 0;
    double denominator = 0;
    for (int i = 0; i < minSize; i++) {
        numerator += header.attribute(i).weight() * Math.abs(src[i] - dst[i]);
        denominator += header.attribute(i).weight() * Math.abs(src[i] + dst[i]);
    }
    ret += (denominator != 0.0) ? numerator / denominator : Double.MAX_VALUE;
    // Safety...
    if (Double.isInfinite(ret)) {
        ret = Double.MAX_VALUE;
    } else if (Double.isNaN(ret)) {
        ret = 0.0;
    }
    return ret;
}

From source file:moa.core.VectorDistances.java

License:Apache License

/**
 * Chord distance (Orloci, 1967)/*from  w  w w.  java2s. co m*/
 * @param src first data point to compare from
 * @param dst second data point to compare to
 * @param header feature weight (strictly speaking, all weights should be 1 for pure Minkowski)
 * @return distance of the chord joining two normalized points within a hypersphere of radius 1
 */
public static synchronized double distanceChord(double[] src, double[] dst, Instances header) {
    double ret = 0.0;
    int minSize = Math.min(src.length, Math.min(dst.length, header.numAttributes()));
    if (minSize < 1) {
        return Double.MAX_VALUE;
    }

    double srcL2Norm = 0.0;
    double dstL2Norm = 0.0;
    for (int i = 0; i < src.length; i++) {
        srcL2Norm += src[i] * src[i];
    }
    srcL2Norm = Math.sqrt(srcL2Norm);
    for (int i = 0; i < dst.length; i++) {
        dstL2Norm += dst[i] * dst[i];
    }
    dstL2Norm = Math.sqrt(dstL2Norm);

    for (int i = 0; i < minSize; i++) {
        ret += src[i] * dst[i] * header.attribute(i).weight();
    }
    ret = Math.abs(2.0 - 2 * (ret / (srcL2Norm * dstL2Norm)));
    ret = Math.sqrt(ret);

    // Safety...
    if (Double.isInfinite(ret)) {
        ret = Double.MAX_VALUE;
    } else if (Double.isNaN(ret)) {
        ret = 0.0;
    }
    return ret;
}

From source file:moa.streams.ConceptDriftRealStream.java

License:Open Source License

@Override
public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {

    this.inputStream = (InstanceStream) getPreparedClassOption(this.streamOption);
    this.driftStream = (InstanceStream) getPreparedClassOption(this.driftstreamOption);
    this.random = new Random(this.randomSeedOption.getValue());
    numberInstanceStream = 0;//from  w  w w  . jav a  2  s  .  com
    if (this.alphaOption.getValue() != 0.0) {
        this.widthOption.setValue((int) (1 / Math.tan(this.alphaOption.getValue() * Math.PI / 180)));
    }

    // generate header
    Instances first = this.inputStream.getHeader();
    Instances second = this.driftStream.getHeader();
    FastVector newAttributes = new FastVector();
    for (int i = 0; i < first.numAttributes() - 1; i++) {
        newAttributes.addElement(first.attribute(i));
    }
    for (int i = 0; i < second.numAttributes() - 1; i++) {
        newAttributes.addElement(second.attribute(i));
    }

    Attribute classLabels;
    if (first.numClasses() < second.numClasses()) {
        classLabels = second.classAttribute();
    } else {
        classLabels = first.classAttribute();
    }
    newAttributes.addElement(classLabels);

    this.streamHeader = new InstancesHeader(
            new Instances(getCLICreationString(InstanceStream.class), newAttributes, 0));
    this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
    restart();

}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

/**
 * Copies the class attribute to another position (first position)
 *
 * @param instances/*from  w  w w.  jav a 2s .  co  m*/
 * @param newName
 * @param newAttributeIndex
 * @return
 * @throws Exception
 */
public static Instances copyClassAttribute(Instances instances, String newName, int newAttributeIndex)
        throws Exception {

    int whichAttribute = instances.classIndex();

    Add filter = new Add();
    filter.setAttributeIndex("" + newAttributeIndex);
    filter.setAttributeName(newName);

    // Copy nominal Attribute
    if (instances.attribute(whichAttribute).isNominal()) {
        String newNominalLabels = "";
        Boolean first = true;
        Enumeration<Object> o = instances.attribute(whichAttribute).enumerateValues();
        while (o.hasMoreElements()) {
            String s = (String) o.nextElement();
            if (!first) {
                newNominalLabels += ",";
            }
            newNominalLabels += s;
            first = false;
        }
        filter.setNominalLabels(newNominalLabels);
    }

    filter.setInputFormat(instances);
    instances = Filter.useFilter(instances, filter);
    return instances;
}

From source file:model.clasification.ModelFinder.java

public static void writeIntoTxt(String modelName) throws IOException {
    Instances data = Data.getInstance().getInstances();

    PrintWriter bw = new PrintWriter(
            new BufferedWriter(new FileWriter("methods/classification/TxtFiles/" + modelName + ".txt")));

    //        BufferedWriter bw = new BufferedWriter(new FileWriter("methods/classification/TxtFiles/" + modelName + ".txt"));
    int brojAtr = data.numAttributes() - 1;

    for (int i = 0; i < brojAtr; i++) {

        int prviR = data.attribute(i).toString().indexOf("'");
        int drugiR = data.attribute(i).toString().lastIndexOf("'");

        bw.write(data.attribute(i).toString().substring(prviR + 1, drugiR) + ", ");

    }/*from  w ww  . j  a  v  a  2s. co m*/
    bw.close();
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer
 * that are not being set via options./*w  ww  .ja va 2s  . c o m*/
 *
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been
 * generated successfully
 */
public void buildClusterer(Instances data) throws Exception {
    for (int i = 0; i < m_NumClusters; i++) {
        bucket[i] = new ArrayList<bucketInstance>();
    }
    // calculate bucket size
    bucketSize = (int) Math.ceil(data.numInstances() / (double) m_NumClusters); //System.out.print("bucketSize = " + bucketSize + "\n");                // can clusterer handle the data?

    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);
    instances.setClassIndex(-1);
    m_ReplaceMissingFilter.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_ReplaceMissingFilter);

    m_Min = new double[instances.numAttributes()];
    m_Max = new double[instances.numAttributes()];
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_Min[i] = m_Max[i] = Double.NaN;
    }
    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    for (int i = 0; i < instances.numInstances(); i++) {
        updateMinMax(instances.instance(i));
    }

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    for (int j = instances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(instances.instance(instIndex), instances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(instances.instance(instIndex));
            initC.put(hk, null);
        }
        instances.swap(j, instIndex);
        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();
    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    while (!converged) {
        // reset buckets
        for (int j = 0; j < m_NumClusters; j++) {
            bucket[j] = new ArrayList<bucketInstance>();
        }
        emptyClusterCount = 0;
        m_Iterations++;
        //System.out.println(">>Iterations: "+m_Iterations);
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            //System.out.println("processing instance: " + i);
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }
        if (m_Iterations > maxIterations) {
            converged = true;
        }
        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            double[] vals = new double[instances.numAttributes()];
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                for (int j = 0; j < instances.numAttributes(); j++) {
                    vals[j] = tempI[i].meanOrMode(j);
                    m_ClusterNominalCounts[i][j] = tempI[i].attributeStats(j).nominalCounts;
                }
                m_ClusterCentroids.add(new DenseInstance(1.0, vals));
            }
            //System.out.println("centroid: " + i + " " + m_ClusterCentroids.instance(i).toString());
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            tempI = new Instances[m_NumClusters];
        }
        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }
    // reset buckets
    for (int j = 0; j < m_NumClusters; j++) {
        bucket[j] = new ArrayList<bucketInstance>();
    }
    m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        double[] vals2 = new double[instances.numAttributes()];
        for (int j = 0; j < instances.numAttributes(); j++) {
            if (instances.attribute(j).isNumeric()) {
                vals2[j] = Math.sqrt(tempI[i].variance(j));
            } else {
                vals2[j] = Utils.missingValue();
            }
        }
        m_ClusterStdDevs.add(new DenseInstance(1.0, vals2));
        m_ClusterSizes[i] = tempI[i].numInstances();
    }
}

From source file:mulan.classifier.meta.HierarchyBuilder.java

License:Open Source License

/**
 * Creates the hierarchical dataset according to the original multilabel
 * instances object and the constructed label hierarchy
 *
 * @param mlData the original multilabel instances
 * @param metaData the metadata of the constructed label hierarchy
 * @return the produced dataset//from   www. j  a  v a  2 s.c  om
 * @throws InvalidDataFormatException 
 */
public static MultiLabelInstances createHierarchicalDataset(MultiLabelInstances mlData, LabelsMetaData metaData)
        throws InvalidDataFormatException {
    Set<String> leafLabels = mlData.getLabelsMetaData().getLabelNames();
    Set<String> metaLabels = metaData.getLabelNames();
    for (String string : leafLabels) {
        metaLabels.remove(string);
    }
    Instances dataSet = mlData.getDataSet();
    int numMetaLabels = metaLabels.size();

    // copy existing attributes
    ArrayList<Attribute> atts = new ArrayList<Attribute>(dataSet.numAttributes() + numMetaLabels);
    for (int i = 0; i < dataSet.numAttributes(); i++) {
        atts.add(dataSet.attribute(i));
    }

    ArrayList<String> labelValues = new ArrayList<String>();
    labelValues.add("0");
    labelValues.add("1");

    // add metalabel attributes
    for (String metaLabel : metaLabels) {
        atts.add(new Attribute(metaLabel, labelValues));
    }

    // initialize dataset
    Instances newDataSet = new Instances("hierarchical", atts, dataSet.numInstances());

    // copy features and labels, set metalabels
    for (int i = 0; i < dataSet.numInstances(); i++) {
        //System.out.println("Constructing instance " + (i+1) + "/"  + dataSet.numInstances());
        // initialize new values
        double[] newValues = new double[newDataSet.numAttributes()];
        Arrays.fill(newValues, 0);

        // copy features and labels
        double[] values = dataSet.instance(i).toDoubleArray();
        System.arraycopy(values, 0, newValues, 0, values.length);

        // set metalabels
        for (String label : leafLabels) {
            Attribute att = dataSet.attribute(label);
            if (att.value((int) dataSet.instance(i).value(att)).equals("1")) {
                //System.out.println(label);
                //System.out.println(Arrays.toString(metaData.getLabelNames().toArray()));
                LabelNode currentNode = metaData.getLabelNode(label);
                // put 1 all the way up to the root, unless you see a 1, in which case stop
                while (currentNode.hasParent()) {
                    currentNode = currentNode.getParent();
                    Attribute currentAtt = newDataSet.attribute(currentNode.getName());
                    // change the following to refer to the array
                    if (newValues[atts.indexOf(currentAtt)] == 1) // no need to go more up
                    {
                        break;
                    } else // put 1
                    {
                        newValues[atts.indexOf(currentAtt)] = 1;
                    }
                }
            }
        }
        Instance instance = dataSet.instance(i);
        newDataSet.add(DataUtils.createInstance(instance, instance.weight(), newValues));
    }
    return new MultiLabelInstances(newDataSet, metaData);
}

From source file:mulan.classifier.meta.HMC.java

License:Open Source License

private void buildRec(HMCNode node, Instances data) throws InvalidDataFormatException, Exception {
    String metaLabel = node.getName();

    //debug("Preparing node data");
    Set<String> childrenLabels = new HashSet<String>();
    Set<String> currentlyAvailableLabels = new HashSet<String>();
    if (metaLabel.equals("root")) {
        for (LabelNode child : originalMetaData.getRootLabels()) {
            childrenLabels.add(child.getName());
        }/*from  w w  w .  j  a v a  2  s .  co m*/
        currentlyAvailableLabels = originalMetaData.getLabelNames();
    } else {
        LabelNode labelNode = originalMetaData.getLabelNode(metaLabel);
        for (LabelNode child : labelNode.getChildren()) {
            childrenLabels.add(child.getName());
        }
        currentlyAvailableLabels = labelNode.getDescendantLabels();
    }

    // delete non-children labels
    Set<String> labelsToDelete = new HashSet(currentlyAvailableLabels);
    labelsToDelete.removeAll(childrenLabels);
    //=====================================================
    //  System.out.println("Children: " + Arrays.toString(childrenLabels.toArray()));
    //  System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete.toArray()));
    //======================================================
    int[] indicesToDelete = new int[labelsToDelete.size()];
    int counter1 = 0;
    for (String label : labelsToDelete) {
        indicesToDelete[counter1] = data.attribute(label).index();
        counter1++;
    }

    Remove filter1 = new Remove();
    filter1.setAttributeIndicesArray(indicesToDelete);
    filter1.setInputFormat(data);
    Instances nodeInstances = Filter.useFilter(data, filter1);
    //        System.out.println()

    // create meta data
    LabelsMetaDataImpl nodeMetaData = new LabelsMetaDataImpl();
    for (String label : childrenLabels) {
        nodeMetaData.addRootNode(new LabelNodeImpl(label));
    }

    // create multi-label instance
    MultiLabelInstances nodeData = new MultiLabelInstances(nodeInstances, nodeMetaData);
    //==================================================
    // System.out.println("Building model");
    //============================================
    node.build(nodeData);
    //============================================
    System.out.println("spark #instances:" + nodeInstances.numInstances());
    //============================================
    TotalUsedTrainInsts += nodeInstances.numInstances();
    NoNodes++;
    //============================================
    // System.out.println("spark:#nodes: "+ root);
    //============================================
    for (String childLabel : childrenLabels) {
        LabelNode childNode = originalMetaData.getLabelNode(childLabel);
        if (!childNode.hasChildren()) {
            continue;
        }
        //=================================

        //    System.out.println("Preparing child data");
        //============================================
        // remove instances where child is 0
        int childMetaLabelIndex = data.attribute(childLabel).index();
        Instances childData = new Instances(data);
        for (int i = 0; i < childData.numInstances(); i++) {
            if (childData.instance(i).stringValue(childMetaLabelIndex).equals("0")) {
                childData.delete(i);
                // While deleting an instance from the trainSet, i must reduced too
                i--;
            }
        }

        // delete non-descendant labels
        Set<String> descendantLabels = childNode.getDescendantLabels();
        Set<String> labelsToDelete2 = new HashSet(currentlyAvailableLabels);
        labelsToDelete2.removeAll(descendantLabels);
        //System.out.println("Labels to delete:" + Arrays.toString(labelsToDelete2.toArray()));
        int[] indicesToDelete2 = new int[labelsToDelete2.size()];
        int counter2 = 0;
        for (String label : labelsToDelete2) {
            indicesToDelete2[counter2] = childData.attribute(label).index();
            counter2++;
        }

        Remove filter2 = new Remove();
        filter2.setAttributeIndicesArray(indicesToDelete2);
        filter2.setInputFormat(childData);
        childData = Filter.useFilter(childData, filter2);

        MultiLabelLearner mll = baseLearner.makeCopy();
        HMCNode child = new HMCNode(childLabel, mll);
        node.addChild(child);

        buildRec(child, childData);
    }

}

From source file:mulan.classifier.neural.DataPair.java

License:Open Source License

/**
 * Creates a {@link DataPair} representation for each {@link Instance} contained in
 * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation
 * of instance values (by double values), which is useful when iteration over the data and its
 * values.//  w ww .j  av a  2 s  .c  o  m
 *
 * @param mlDataSet the {@link MultiLabelInstances} which content has to be
 *          converted to list of {@link DataPair}
 * @param bipolarOutput indicates whether output values should be converted
 *          to bipolar values, or left intact as binary
 * @return the list of data pairs
 */
// TODO: this method should be in some kind of "data utils".
public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) {

    Instances data = mlDataSet.getDataSet();
    int[] featureIndices = mlDataSet.getFeatureIndices();
    int[] labelIndices = mlDataSet.getLabelIndices();
    int numFeatures = featureIndices.length;
    int numLabels = mlDataSet.getNumLabels();

    int numInstances = data.numInstances();
    List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances);
    for (int index = 0; index < numInstances; index++) {
        Instance instance = data.instance(index);
        double[] input = new double[numFeatures];
        for (int i = 0; i < numFeatures; i++) {
            int featureIndex = featureIndices[i];
            Attribute featureAttr = instance.attribute(featureIndex);
            // if attribute is binary, parse the string value ... it is expected to be '0' or '1'
            if (featureAttr.isNominal() && featureAttr.numValues() == 2) {
                input[i] = Double.parseDouble(instance.stringValue(featureIndex));
            } // else :
              // a) the attribute is nominal with multiple values, use indexes as nominal values
              //    do not have to be numbers in general ... this is fall-back ... should be rare case
              // b) is numeric attribute
            else {
                input[i] = instance.value(featureIndex);
            }
        }

        if (mlDataSet.hasMissingLabels(instance))
            continue;

        double[] output = new double[numLabels];
        for (int i = 0; i < numLabels; i++) {
            output[i] = Double
                    .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i])));
            if (bipolarOutput && output[i] == 0) {
                output[i] = -1;
            }
        }

        dataPairs.add(new DataPair(input, output));
    }

    return dataPairs;
}