Example usage for weka.core Instances set

List of usage examples for weka.core Instances set

Introduction

In this page you can find the example usage for weka.core Instances set.

Prototype



@Override
public Instance set(int index, Instance instance) 

Source Link

Document

Replaces the instance at the given position.

Usage

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Merges the datasets based on the collected IDs.
 *
 * @param orig   the original datasets/*  w  w  w. ja  v a2s.  c  om*/
 * @param inst   the processed datasets to merge into one
 * @param ids      the IDs for identifying the rows
 * @return      the merged dataset
 */
protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) {
    Instances result;
    ArrayList<Attribute> atts;
    int i;
    int n;
    int m;
    int index;
    String relation;
    List sortedIDs;
    Attribute att;
    int[] indexStart;
    double value;
    double[] values;
    HashMap<Integer, Integer> hashmap;
    HashSet<Instance> hs;

    // create header
    if (isLoggingEnabled())
        getLogger().info("Creating merged header...");
    atts = new ArrayList<>();
    relation = "";
    indexStart = new int[inst.length];
    for (i = 0; i < inst.length; i++) {
        indexStart[i] = atts.size();
        for (n = 0; n < inst[i].numAttributes(); n++)
            atts.add((Attribute) inst[i].attribute(n).copy());
        // assemble relation name
        if (i > 0)
            relation += "_";
        relation += inst[i].relationName();
    }
    result = new Instances(relation, atts, ids.size());

    // fill with missing values
    if (isLoggingEnabled())
        getLogger().info("Filling with missing values...");
    for (i = 0; i < ids.size(); i++) {
        if (isStopped())
            return null;
        // progress
        if (isLoggingEnabled() && ((i + 1) % 1000 == 0))
            getLogger().info("" + (i + 1));
        result.add(new DenseInstance(result.numAttributes()));
    }

    // sort IDs
    if (isLoggingEnabled())
        getLogger().info("Sorting indices...");
    sortedIDs = new ArrayList(ids);
    Collections.sort(sortedIDs);

    // generate rows
    hashmap = new HashMap<>();
    for (i = 0; i < inst.length; i++) {
        if (isStopped())
            return null;
        if (isLoggingEnabled())
            getLogger().info("Adding file #" + (i + 1));
        att = orig[i].attribute(m_UniqueID);
        for (n = 0; n < inst[i].numInstances(); n++) {
            // progress
            if (isLoggingEnabled() && ((n + 1) % 1000 == 0))
                getLogger().info("" + (n + 1));

            // determine index of row
            if (m_AttType == Attribute.NUMERIC)
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att));
            else
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att));
            if (index < 0)
                throw new IllegalStateException(
                        "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!");

            if (!hashmap.containsKey(index))
                hashmap.put(index, 0);
            hashmap.put(index, hashmap.get(index) + 1);

            // use internal representation for faster access
            values = result.instance(index).toDoubleArray();

            // add attribute values
            for (m = 0; m < inst[i].numAttributes(); m++) {
                // missing value?
                if (inst[i].instance(n).isMissing(m))
                    continue;

                switch (inst[i].attribute(m).type()) {
                case Attribute.NUMERIC:
                case Attribute.DATE:
                case Attribute.NOMINAL:
                    values[indexStart[i] + m] = inst[i].instance(n).value(m);
                    break;

                case Attribute.STRING:
                    value = result.attribute(indexStart[i] + m)
                            .addStringValue(inst[i].instance(n).stringValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                case Attribute.RELATIONAL:
                    value = result.attribute(indexStart[i] + m)
                            .addRelation(inst[i].instance(n).relationalValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                default:
                    throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type());
                }
            }

            // update row
            result.set(index, new DenseInstance(1.0, values));
        }
    }

    if (getRemove()) {
        hs = new HashSet<>();
        for (Integer x : hashmap.keySet()) {
            if (hashmap.get(x) != inst.length)
                hs.add(result.get(x));
        }
        result.removeAll(hs);
    }

    return result;
}

From source file:ffnn.MultilayerPerceptron.java

License:Open Source License

/**
 * This function sets what the m_numeric flag to represent the passed class it
 * also performs the normalization of the attributes if applicable and sets up
 * the info to normalize the class. (note that regardless of the options it
 * will fill an array with the range and base, set to normalize all attributes
 * and the class to be between -1 and 1)
 * /*  ww w  . ja va2s. com*/
 * @param inst the instances.
 * @return The modified instances. This needs to be done. If the attributes
 *         are normalized then deep copies will be made of all the instances
 *         which will need to be passed back out.
 */
private Instances setClassType(Instances inst) throws Exception {
    if (inst != null) {
        // x bounds
        m_attributeRanges = new double[inst.numAttributes()];
        m_attributeBases = new double[inst.numAttributes()];
        for (int noa = 0; noa < inst.numAttributes(); noa++) {
            double min = Double.POSITIVE_INFINITY;
            double max = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < inst.numInstances(); i++) {
                if (!inst.instance(i).isMissing(noa)) {
                    double value = inst.instance(i).value(noa);
                    if (value < min) {
                        min = value;
                    }
                    if (value > max) {
                        max = value;
                    }
                }
            }
            m_attributeRanges[noa] = (max - min) / 2;
            m_attributeBases[noa] = (max + min) / 2;
        }

        if (m_normalizeAttributes) {
            for (int i = 0; i < inst.numInstances(); i++) {
                Instance currentInstance = inst.instance(i);
                double[] instance = new double[inst.numAttributes()];
                for (int noa = 0; noa < inst.numAttributes(); noa++) {
                    if (noa != inst.classIndex()) {
                        if (m_attributeRanges[noa] != 0) {
                            instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa])
                                    / m_attributeRanges[noa];
                        } else {
                            instance[noa] = currentInstance.value(noa) - m_attributeBases[noa];
                        }
                    } else {
                        instance[noa] = currentInstance.value(noa);
                    }
                }
                inst.set(i, new DenseInstance(currentInstance.weight(), instance));
            }
        }

        if (inst.classAttribute().isNumeric()) {
            m_numeric = true;
        } else {
            m_numeric = false;
        }
    }
    return inst;
}