Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *//*from  ww  w  .ja  va2  s  .  c  om*/
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:aw_cluster.myKMeans.java

@Override
public void buildClusterer(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    Instances instances = new Instances(data);
    instances.setClassIndex(-1);/*w  ww.j  av  a 2  s .c om*/

    if (instances.numInstances() == 0) {
        throw new RuntimeException("Dataset should not be empty");
    }

    assignments = new int[instances.numInstances()];
    centroid = new Instances(instances, numCluster);
    distanceFunction.setInstances(instances);
    squaredError = new double[numCluster];

    // Initialize Centroid Random From seed
    Random random = new Random(getSeedRandom());
    Instances tempInstances = new Instances(instances);

    int tI = tempInstances.numInstances() - 1;
    while (tI >= 0 && centroid.numInstances() < numCluster) {
        int indexCentroid = random.nextInt(tI + 1);
        centroid.add(tempInstances.instance(indexCentroid));
        tempInstances.swap(tI, indexCentroid);
        tI--;
    }

    tempInstances = null;

    boolean converged = false;
    while (!converged) {
        converged = true;
        numIteration++;
        for (int i = 0; i < instances.numInstances(); ++i) {
            Instance toCluster = instances.instance(i);
            int clusterResult = clusterInstanceProcess(toCluster, true);
            if (clusterResult != assignments[i])
                converged = false;
            assignments[i] = clusterResult;
        }

        // update centroid
        Instances[] TempI = new Instances[numCluster];
        centroid = new Instances(instances, numCluster);
        for (int i = 0; i < TempI.length; ++i) {
            TempI[i] = new Instances(instances, 0);
        }
        for (int i = 0; i < instances.numInstances(); ++i) {
            TempI[assignments[i]].add(instances.instance(i));
        }
        for (int i = 0; i < TempI.length; ++i) {
            moveCentroid(TempI[i]);
        }
        if (converged)
            squaredError = new double[numCluster];
        if (numIteration == maxIteration)
            converged = true;
        sizeEachCluster = new int[numCluster];
        for (int i = 0; i < numCluster; ++i) {
            sizeEachCluster[i] = TempI[i].numInstances();
        }

    }
}

From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java

/**
 * Initialize the tree configuration. This implementation considers a complete binary tree of depth h. 
 *///from  www  .j av a2s .  c  o  m
private void initialize() throws IOException {
    //the number of nodes N, is penednt on h. actualy N
    //h=ln N -> N=2^h
    Instances instances = readArff(getFilenameARFF());
    instances.setClassIndex(-1);//clustering Stuff

    numberOfInstances = instances.numInstances();
    dimensionality = instances.numAttributes();
    data = new double[getNumberOfInstances()][getDimensionality()];
    weight = new double[getNumberOfNeurons()][getDimensionality()];
    //randomly select instances and assign to weight.

    for (int k = 0; k < getNumberOfNeurons(); k++) {
        weight[k] = instances.instance(r.nextInt(getNumberOfInstances())).toDoubleArray(); //hard copy of the double array
    }

    for (int k = 0; k < getNumberOfInstances(); k++) {
        data[k] = instances.instance(k).toDoubleArray(); //hard copy of the double array
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Evaluates the classifier on a given set of instances. Note that the data
 * must have exactly the same format (e.g. order of attributes) as the data
 * used to train the classifier! Otherwise the results will generally be
 * meaningless.//w  ww.  j  a v  a2s.  c  o  m
 * 
 * @param classifier machine learning classifier
 * @param data set of test instances for evaluation
 * @param forPredictionsString varargs parameter that, if supplied, is
 *          expected to hold a StringBuffer to print predictions to, a Range
 *          of attributes to output and a Boolean (true if the distribution is
 *          to be printed)
 * @return the predictions
 * @throws Exception if model could not be evaluated successfully
 */
public double[] evaluateModel(List<LibSVM> classifier, List<Double> classifierWeight, Instances data,
        Object... forPredictionsPrinting) throws Exception {
    // for predictions printing
    StringBuffer buff = null;
    Range attsToOutput = null;
    boolean printDist = false;

    double predictions[] = new double[data.numInstances()];

    if (forPredictionsPrinting.length > 0) {
        buff = (StringBuffer) forPredictionsPrinting[0];
        attsToOutput = (Range) forPredictionsPrinting[1];
        printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
    }

    // Need to be able to collect predictions if appropriate (for AUC)

    for (int i = 0; i < data.numInstances(); i++) {
        predictions[i] = evaluateModelOnceAndRecordPrediction(classifier, classifierWeight, data.instance(i));

    }
    for (int i = 0; i < predictions.length; i++) {
        System.out.print(predictions[i]);
    }
    return predictions;
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Sets the class prior probabilities/*  w w  w.j a va  2s  .  co  m*/
 * 
 * @param train the training instances used to determine the prior
 *          probabilities
 * @throws Exception if the class attribute of the instances is not set
 */
public void setPriors(Instances train) throws Exception {
    m_NoPriors = false;

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public AdaBoost(Instances insts, int numInterations) {
    this.insts = insts;
    this.numIterations = numInterations;
    this.classifiers = new Classifier[numInterations];
    this.cweights = new double[numInterations];

    // initialize instance's weight
    int numInstances = insts.numInstances();
    for (int i = 0; i < numInstances; i++) {
        double tweight = 1.0 / numInstances;
        insts.instance(i).setWeight(tweight);
    }/*from   ww  w  .  j av  a 2 s  .c o  m*/
    //System.out.println("instances weights total: " + insts.sumOfWeights());

}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public SAMME(Instances insts, int numInterations) {
    this.insts = insts;
    this.numIterations = numInterations;
    this.classifiers = new Classifier[numInterations];
    this.cweights = new double[numInterations];

    // initialize instance's weight
    final int numInstances = insts.numInstances();
    for (int i = 0; i < numInstances; i++) {
        double tweight = 1.0 / numInstances;
        insts.instance(i).setWeight(tweight);
    }//  ww  w.j  a  v  a  2  s.  c om
    //System.out.println("instances weights total: " + insts.sumOfWeights());

}

From source file:br.com.edu.arff.LoadArff.java

public ArrayList<Cluster> carregarArff(String caminho) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new FileReader(caminho));
    ArffReader arff = new ArffReader(reader);
    Instances data = arff.getData();
    data.setClassIndex(data.numAttributes() - 1);
    Instance inst = null;//from w  w w .  j a  v  a 2  s.  c o m
    Attribute att = data.attribute("Cluster");

    ArrayList<String> uris;
    ArrayList<Cluster> lista = new ArrayList<Cluster>();

    Fuseki fuseki = new Fuseki();

    uris = fuseki.buscaURIS();

    for (int i = 0; i <= data.numInstances() - 1; i++) {
        Cluster cluster = new Cluster();
        String clusters = String.valueOf(data.get(i).stringValue(att));
        cluster.setUri(uris.get(i));
        cluster.setGrupo(clusters);
        lista.add(cluster);
    }

    //        for (Cluster c : lista) {
    //              System.out.println(c.getUri());
    //              System.out.println(c.getGrupo());
    //        } 
    return lista;
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector.//from w  w  w. j a v a  2s  .c o m
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

public static void print_dataset_as_matrix(Instances data) {
    for (int i = 0; i < data.numInstances(); i++) {
        for (int j = 0; j < data.numAttributes(); j++)
            System.out.print(data.instance(i).value(j) + " ");
        System.out.println();/*  ww w. j av a2  s .c  o m*/
    }
}