Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector.//  ww w .j a v  a  2s . c  om
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

public static void print_dataset_as_matrix(Instances data) {
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; j < data.numAttributes(); j++)
            System.out.print(data.instance(i).value(j) + " ");
        System.out.println();//from  w w w  . j a va  2  s .  co  m
    }
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

public static Instances genGaussianDatasetWithSigmaEvolution(double[][] centers, double[][] sigmas,
        double[][] sigmas2, int pointsPerCluster, long seed, boolean randomize) {
    Instances dataset1 = genGaussianDataset(centers, sigmas, pointsPerCluster, seed, randomize, false);
    Instances dataset2 = genGaussianDataset(centers, sigmas2, pointsPerCluster, seed + 59387, randomize, false);

    for (int i = 0; i < dataset2.numInstances(); i++)
        dataset1.add(dataset2.instance(i));

    return dataset1;
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

/**
 * Convert an Instances data set to a doubles matrix.
 * @param data//  ww w . ja  v a 2 s  .c o m
 * @return data as a double array
 */
public static double[][] convertInstancesToDoubleMatrix(Instances data) {
    int N = data.numInstances();
    int m = data.numAttributes();
    double[][] ddata = new double[N][m];
    double[] temp;

    for (int i = 0; i < N; i++) {
        temp = data.instance(i).toDoubleArray();
        for (int j = 0; j < m; j++)
            ddata[i][j] = temp[j];
    }

    return (ddata);
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(Object objData) {

    try {//from ww w .j ava  2s  .c  om
        Instances dataInstance;
        DataBag values = (DataBag) objData;
        int numAttributes = values.iterator().next().size(); // N_Features + 1 Class
        int bagSize = 0; // To set the number of train samples

        // To find the number of samples (instances in a bag)
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            it.next();
            bagSize = bagSize + 1;
        }

        // Code for find the different classes names in the input 
        String[] inputClass = new String[bagSize]; // String vector with the samples class's names
        int index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            inputClass[index] = DataType.toString(tuple.get(numAttributes - 1));
            index = index + 1;
        }

        HashSet classSet = new HashSet(Arrays.asList(inputClass));

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i)));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(tuple.get(numAttributes - 1)));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(BufferedReader buff) {

    try {//from w  w w .  ja  va 2  s. c  o  m
        Instances dataInstance;
        //DataBag values = (DataBag)objData;

        int numAttributes = 0; // N_Features + 1 Class

        List<String> inputClass = new ArrayList<String>();

        List<String[]> dataset = new ArrayList<String[]>();

        // To find the number of samples (instances in a bag)
        String line;
        while ((line = buff.readLine()) != null) {
            if (!line.isEmpty()) {
                String[] data = line.split(",");
                if (numAttributes == 0)
                    numAttributes = data.length;
                inputClass.add(data[data.length - 1]);
                dataset.add(data);
            }
        }

        HashSet classSet = new HashSet(inputClass);

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        int index = 0;
        for (int k = 0; k < dataset.size(); k++) {

            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i]));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(dataset.get(k)[numAttributes - 1]));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private double E(Instances inst, boolean change_weights) throws Exception {

    double loglk = 0.0, sOW = 0.0;

    for (int l = 0; l < inst.numInstances(); l++) {

        Instance in = inst.instance(l);

        loglk += in.weight() * logDensityForInstance(in);
        sOW += in.weight();/*from w w  w  .ja  v  a 2 s  . co  m*/

        if (change_weights) {
            m_weights[l] = distributionForInstance(in);
        }
    }

    // reestimate priors
    if (change_weights) {
        estimate_priors(inst);
    }
    return loglk / sOW;
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void EM_Init(Instances inst) throws Exception {
    int i, j, k;// w  w  w  .  j ava2s . c om

    // run k means 10 times and choose best solution
    SimpleKMeans bestK = null;
    double bestSqE = Double.MAX_VALUE;
    for (i = 0; i < 10; i++) {
        SimpleKMeans sk = new SimpleKMeans();
        sk.setSeed(m_rr.nextInt());
        sk.setNumClusters(m_num_clusters);
        sk.setDisplayStdDevs(true);
        sk.buildClusterer(inst);
        if (sk.getSquaredError() < bestSqE) {
            bestSqE = sk.getSquaredError();
            bestK = sk;
        }
    }

    // initialize with best k-means solution
    m_num_clusters = bestK.numberOfClusters();
    m_weights = new double[inst.numInstances()][m_num_clusters];
    m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs];
    m_modelNormal = new double[m_num_clusters][m_num_attribs][3];
    m_priors = new double[m_num_clusters];
    Instances centers = bestK.getClusterCentroids();
    Instances stdD = bestK.getClusterStandardDevs();
    double[][][] nominalCounts = bestK.getClusterNominalCounts();
    double[] clusterSizes = bestK.getClusterSizes();

    for (i = 0; i < m_num_clusters; i++) {
        Instance center = centers.instance(i);
        for (j = 0; j < m_num_attribs; j++) {
            if (inst.attribute(j).isNominal()) {
                m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true);
                for (k = 0; k < inst.attribute(j).numValues(); k++) {
                    m_model[i][j].addValue(k, nominalCounts[i][j][k]);
                }
            } else {
                double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev;
                double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j);
                m_modelNormal[i][j][0] = mean;
                double stdv = (stdD.instance(i).isMissing(j))
                        ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters))
                        : stdD.instance(i).value(j);
                if (stdv < minStdD) {
                    stdv = inst.attributeStats(j).numericStats.stdDev;
                    if (Double.isInfinite(stdv)) {
                        stdv = minStdD;
                    }
                    if (stdv < minStdD) {
                        stdv = minStdD;
                    }
                }
                if (stdv <= 0) {
                    stdv = m_minStdDev;
                }

                m_modelNormal[i][j][1] = stdv;
                m_modelNormal[i][j][2] = 1.0;
            }
        }
    }

    for (j = 0; j < m_num_clusters; j++) {
        // m_priors[j] += 1.0;
        m_priors[j] = clusterSizes[j];
    }
    Utils.normalize(m_priors);
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void estimate_priors(Instances inst) throws Exception {

    for (int i = 0; i < m_num_clusters; i++) {
        m_priors[i] = 0.0;//from ww w .  j  a v a 2  s . co  m
    }

    for (int i = 0; i < inst.numInstances(); i++) {
        for (int j = 0; j < m_num_clusters; j++) {
            m_priors[j] += inst.instance(i).weight() * m_weights[i][j];
        }
    }

    Utils.normalize(m_priors);
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void M(Instances inst) throws Exception {

    int i, j, l;/*from ww  w . ja  v a  2 s .  c  o m*/

    new_estimators();

    for (i = 0; i < m_num_clusters; i++) {
        for (j = 0; j < m_num_attribs; j++) {
            for (l = 0; l < inst.numInstances(); l++) {
                Instance in = inst.instance(l);
                if (!in.isMissing(j)) {
                    if (inst.attribute(j).isNominal()) {
                        m_model[i][j].addValue(in.value(j), in.weight() * m_weights[l][i]);
                    } else {
                        m_modelNormal[i][j][0] += (in.value(j) * in.weight() * m_weights[l][i]);
                        m_modelNormal[i][j][2] += in.weight() * m_weights[l][i];
                        m_modelNormal[i][j][1] += (in.value(j) * in.value(j) * in.weight() * m_weights[l][i]);
                    }
                }
            }
        }
    }

    // calcualte mean and std deviation for numeric attributes
    for (j = 0; j < m_num_attribs; j++) {
        if (!inst.attribute(j).isNominal()) {
            for (i = 0; i < m_num_clusters; i++) {
                if (m_modelNormal[i][j][2] <= 0) {
                    m_modelNormal[i][j][1] = Double.MAX_VALUE;
                    // m_modelNormal[i][j][0] = 0;
                    m_modelNormal[i][j][0] = m_minStdDev;
                } else {

                    // variance
                    m_modelNormal[i][j][1] = (m_modelNormal[i][j][1]
                            - (m_modelNormal[i][j][0] * m_modelNormal[i][j][0] / m_modelNormal[i][j][2]))
                            / (m_modelNormal[i][j][2]);

                    if (m_modelNormal[i][j][1] < 0) {
                        m_modelNormal[i][j][1] = 0;
                    }

                    // std dev
                    double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev;

                    m_modelNormal[i][j][1] = Math.sqrt(m_modelNormal[i][j][1]);

                    if ((m_modelNormal[i][j][1] <= minStdD)) {
                        m_modelNormal[i][j][1] = inst.attributeStats(j).numericStats.stdDev;
                        if ((m_modelNormal[i][j][1] <= minStdD)) {
                            m_modelNormal[i][j][1] = minStdD;
                        }
                    }
                    if ((m_modelNormal[i][j][1] <= 0)) {
                        m_modelNormal[i][j][1] = m_minStdDev;
                    }
                    if (Double.isInfinite(m_modelNormal[i][j][1])) {
                        m_modelNormal[i][j][1] = m_minStdDev;
                    }

                    // mean
                    m_modelNormal[i][j][0] /= m_modelNormal[i][j][2];
                }
            }
        }
    }
}