Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:lascer.WekaClassifier.java

License:Open Source License

/**
 * Generates the classifier.//w ww.  j a v  a  2  s . com
 *
 * @param data  the data to be used.
 *
 * @exception Exception  if the classifier can't built successfully.
 */
public void buildClassifier(Instances data) throws Exception {
    weka.coreExtended.Instances extendedInstances;
    weka.coreExtended.BasicInstance extInst;
    weka.coreExtended.BasicAttribute classAttribut;
    de.unistuttgart.commandline.Option formelnArtOption;
    de.unistuttgart.commandline.Option formelnKlasseOption;
    de.unistuttgart.commandline.Option loggingSwitch;
    Instance readInst;
    Beispieldaten invDatensatz;
    StringReader stringReader;
    Enumeration instEnum;
    Enumeration attribEnum;
    PraedErzParameter praedErzParameter = null;
    KonzErzParameter konzErzParameter = null;
    Pruning pruning;
    String formelArt;
    String formelKlasse;
    String optionWert;
    float posPruneAnt, negPruneAnt;
    int instNumber;
    boolean unbekannteWertBsp;

    Steuerung.parseArguments(parser);

    formelArt = Konstanten.WEKA_FORMEL_ART;
    formelnArtOption = parser.getOption("formelArt");
    if (parser.isEnabled(formelnArtOption)) {
        optionWert = parser.getParameter(formelnArtOption);
        if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) {

            System.err.println("Wert der Option formelArt unzulssig");
            System.err.println("Zulssig: " + formelnArtOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelArt = optionWert;
    }

    formelKlasse = Konstanten.WEKA_FORMEL_KLASSE;
    formelnKlasseOption = parser.getOption("formelKlasse");
    if (parser.isEnabled(formelnKlasseOption)) {
        optionWert = parser.getParameter(formelnKlasseOption);
        if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste")
                && !optionWert.equals("beide")) {

            System.err.println("Wert der Option formelKlasse unzulssig");
            System.err.println("Zulssig: " + formelnKlasseOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelKlasse = optionWert;
    }

    loggingSwitch = parser.getOption("logging");
    if (debugMode || parser.isEnabled(loggingSwitch)) {
        Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL);
    }

    // Ermittlung der Parameter.
    unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser);
    posPruneAnt = Steuerung.posPruneAnteil(parser);
    negPruneAnt = Steuerung.negPruneAnteil(parser);
    praedErzParameter = Steuerung.praedErzParameter(parser);
    konzErzParameter = Steuerung.konzErzParameter(parser);

    // Einlesen der Daten und Erzeugung des Instanzen-Objekts.
    instNumber = data.numInstances();
    stringReader = new StringReader(data.toString());
    extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber);
    instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        readInst = (Instance) instEnum.nextElement();
        extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray());
        extendedInstances.addBasicInstance(extInst);
    }

    // Erzeugung der Datenstze.
    posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp);
    negDatensatz = posDatensatz.kopie(true);

    // Erzeugung der Liste der Attribute.
    attributListe = new LinkedList();
    attribEnum = extendedInstances.enumerateBasicAttributes();
    while (attribEnum.hasMoreElements()) {
        attributListe.add(attribEnum.nextElement());
    }

    // Ermittlung der Werte der Klassifikation.
    classAttribut = extendedInstances.basicClassAttribute();
    wekaClassTrue = classAttribut.indexOfValue("true");
    wekaClassFalse = classAttribut.indexOfValue("false");

    // Die Formel zur Klasse der positiven Beispiele erzeugen.
    if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    // Die Formel zur Klasse der negativen Beispiele erzeugen.
    if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    if (formelKlasse.equals("beste")) {
        // Die schlechtere Formel lschen.
        if (negFormel.istBesser(posFormel)) {
            posFormel = null;
        } else {
            negFormel = null;
        }
    }

    if ((posPruneAnt > 0) || (negPruneAnt > 0)) {
        pruning = new Pruning();

        if (posFormel != null) {
            posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt);
            posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }

        if (negFormel != null) {
            negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt);
            negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }
    }
}

From source file:lattice.Lattice.java

License:Open Source License

/**
 * Constructor of a lattice over the given variables of the dataset.
 * //from w w  w . j  a  v  a 2s.  co  m
 * @param dataset
 */
public Lattice(Instances dataset) {

    // ~ initialise internal structure for counting (TID sets)
    this.nbInstances = dataset.numInstances();
    this.nbVariables = dataset.numAttributes();

    BitSet[][] presence = new BitSet[nbVariables][];

    TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>();
    int[] nbValuesForAttribute = new int[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing
        presence[a] = new BitSet[nbValuesForAttribute[a]];
        allAttributesNumbers.add(a);
        for (int v = 0; v < presence[a].length; v++) {
            presence[a][v] = new BitSet();
        }
    }

    for (int i = 0; i < nbInstances; i++) {
        Instance row = dataset.instance(i);
        for (int a = 0; a < nbVariables; a++) {

            int indexOfValue;
            if (row.isMissing(a)) {
                //               indexOfValue = (int) dataset.meanOrMode(a);
                indexOfValue = dataset.numDistinctValues(a); //missing at the end
            } else {
                String value = row.stringValue(a);
                indexOfValue = row.attribute(a).indexOfValue(value);
            }
            presence[a][indexOfValue].set(i);

        }
    }

    // initialise the first nodes of the lattice (i.e., the ones
    // corresponding to single variables
    this.all = new LatticeNode(this, nbValuesForAttribute);
    this.singleNodes = new LatticeNode[nbVariables];
    for (int a = 0; a < nbVariables; a++) {
        int[] variablesNumbers = { a };
        LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all);
        singleNodes[a] = node;
    }

}

From source file:lector.Analizador.java

public static void clasificador() {

    BufferedReader reader1;/*from  w w  w  .  j a v  a 2 s .  c om*/
    BufferedReader reader2;
    try {
        reader1 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                + "proyecto/compartida/DataSetAnalisisSentimientos.arff"));

        reader2 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                + "proyecto/compartida/DataSetAnalisisSentimientos_inc.arff"));
        Instances train = new Instances(reader1);
        train.setClassIndex(train.numAttributes() - 1);
        System.out.println(train.classIndex() + " " + train.numAttributes());

        Instances test = new Instances(reader2);
        test.setClassIndex(train.numAttributes() - 1);
        System.out.println(test.classIndex() + " " + test.numAttributes());

        NaiveBayes model = new NaiveBayes();
        model.buildClassifier(train);

        //classify
        Instances labeled = new Instances(test);

        for (int i = 0; i < test.numInstances(); i++) {
            double clsLabel = model.classifyInstance(test.instance(i));
            labeled.instance(i).setClassValue(clsLabel);
        }

        // https://youtu.be/JY_x5zKTfyo?list=PLJbE6j2EG1pZnBhOg3_Rb63WLCprtyJag
        Evaluation eval_train = new Evaluation(test);
        eval_train.evaluateModel(model, test);

        reader1.close();
        reader2.close();

        //System.out.println(eval_train.toSummaryString("\nResults\n======\n", false));
        String[] options = new String[4];
        options[0] = "-t"; //name of training file
        options[1] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/"
                + "compartida/DataSetAnalisisSentimientos.arff";
        options[2] = "-T";
        options[3] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/"
                + "compartida/DataSetAnalisisSentimientos_inc.arff";
        System.out.println(Evaluation.evaluateModel(model, options));

        try ( // print classification results to file
                BufferedWriter writer = new BufferedWriter(
                        new FileWriter("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                                + "proyecto/compartida/DataSetAnalisisSentimientos_labeled.arff"))) {
            writer.write(labeled.toString());
        }

    } catch (Exception e) {
    }
}

From source file:liac.igmn.util.MatrixUtil.java

License:Open Source License

/**
 * //w  ww .  j a va  2  s  .  c  o  m
 * @param data instancias carregadas pelo weka
 * @return matriz correspondentes as instancias 
 */
public static SimpleMatrix instancesToMatrix(Instances data) {
    double dataset[][] = new double[data.numAttributes()][data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        double[] B = data.instance(i).toDoubleArray();
        for (int j = 0; j < data.numAttributes(); j++)
            dataset[j][i] = B[j];
    }

    return new SimpleMatrix(dataset);
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering/*from  w  w w .jav a2s  . co  m*/
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * Expectation Maximization clustering/*from   ww w . j a  v  a  2 s . c om*/
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] em(double[][] data, int numObs, int numFeatures) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);
    EM clusterer = new EM();
    try {
        clusterer.buildClusterer(ds);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(new Instances(ds));
        int numClusters = eval.getNumClusters();

        Cluster[] clusters = new Cluster[numClusters];
        double[][] clusterCentroids = new double[numClusters][numFeatures];
        int[] clusterCount = new int[numClusters];

        double[] assignments = eval.getClusterAssignments();
        for (int i = 0; i < ds.numInstances(); i++) {
            Instance inst = ds.instance(i);
            int clusterId = (int) assignments[i];
            for (int j = 0; j < numFeatures; j++) {
                clusterCentroids[clusterId][j] += inst.value(j);
            }
            clusterCount[clusterId]++;
        }

        for (int i = 0; i < numClusters; i++) {
            double[] mean = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                mean[j] = clusterCentroids[i][j] / clusterCount[i];
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members & std dev
        double[][] clusterStdDev = new double[numClusters][numFeatures];
        for (int i = 0; i < ds.numInstances(); i++) {
            int clusterId = (int) assignments[i];
            clusters[clusterId].addMember(i);
            for (int j = 0; j < numFeatures; j++) {
                clusterStdDev[clusterId][j] += Math
                        .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2);
            }
        }

        for (int i = 0; i < numClusters; i++) {
            double[] dev = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]);
            }
            clusters[i].setStdDev(dev);
        }

        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

From source file:linqs.gaia.model.oc.ncc.WekaClassifier.java

License:Open Source License

/**
 * Predict single item but specify whether or not to save weka
 * test file for the single item.// w ww .ja v a  2 s .c o  m
 * 
 * @param testitem Single test item
 * @param savewekatestfile True to save weka test file and false otherwise
 * @return Predicted value
 */
private CategValue predictSingleItem(Decorable testitem, boolean savewekatestfile) {
    CategValue cvalue = null;
    Instances testinstances = gaia2weka(testitem, true);
    try {
        if (savewekatestfile) {
            String savefile = this.getStringParameter("wekatestfile");
            this.saveWekaInstances(savefile + "-" + testitem, testinstances);
        }

        int numinstances = testinstances.numInstances();
        if (numinstances != 1) {
            throw new InvalidStateException("Only one predicted item should ever be returned");
        }

        Instance inst = testinstances.instance(0);
        double prob[] = this.wekaclassifier.distributionForInstance(inst);

        // Can just take maximum.  This is equivalent to what Weka does
        // to classify the instance.  This saves the cost of recomputing.
        int pred = ArrayUtils.maxValueIndex(prob);
        cvalue = new CategValue(this.targetcategories.get(pred), prob);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return cvalue;
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier// ww  w .  ja  v  a 2s  . c  om
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstanceTableModel.java

License:Open Source License

public void setDataset(final Instances dataSet) {
    this.dataSet = dataSet;
    final int numInstances = dataSet.numInstances();
    final ArrayList<Instance> pdata = new ArrayList<Instance>(numInstances);

    //Initialize rows Id
    final int dsSize = dataSet.numInstances();
    this.rows = new ArrayList<Integer>(dsSize);
    for (int i = 0; i < dsSize; i++) {
        rows.add(i);/*from ww w .  j a  va2s  .c  o m*/
    }

    //Initialize data
    for (int i = 0; i < numInstances; i++) {
        pdata.add(dataSet.instance(i));
    }
    super.setData(pdata);

}

From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java

License:Open Source License

private static void fillWithSingleAxis(final Instances dataSet, final int dateIdx,
        final TimeSeriesCollection tsDataset) {
    final int numInstances = dataSet.numInstances();

    final Calendar cal = Calendar.getInstance();
    for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) {
        if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) {
            System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name()
                    + "', so skip it!");
            continue;
        }//from ww w  .  j a v a 2 s .co m
        final TimeSeries ts = new TimeSeries(dataSet.attribute(i).name());
        for (int k = 0; k < numInstances; k++) {
            final Instance instancek = dataSet.instance(k);
            final long timeInMilliSec = (long) instancek.value(dateIdx);
            cal.setTimeInMillis(timeInMilliSec);

            if (instancek.isMissing(i)) {
                ts.addOrUpdate(new Millisecond(cal.getTime()), null);
            } else {
                ts.addOrUpdate(new Millisecond(cal.getTime()), instancek.value(i));
            }
        }
        if (!ts.isEmpty())
            tsDataset.addSeries(ts);
    }
}