Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset) 

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public Map<String, String> makePrediction(String username, FileInputStream trainingModel,
        FileReader testingDataset) throws Exception {
    Map<String, String> predictions = new HashMap<>();

    ObjectInputStream inputStream = new ObjectInputStream(trainingModel);
    weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject();
    inputStream.close();/*from ww  w . j a  v a 2s.  c o  m*/

    Instances instances = new Instances(new BufferedReader(testingDataset));

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    int last = instances.numInstances() - 1;

    if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) {
        double label = classifier.classifyInstance(instances.instance(last));
        instances.instance(last).setClassValue(label);
        predictions.put(username, instances.instance(last).stringValue(instances.classIndex()));
    }

    return predictions;
}

From source file:categorization.SpectralWEKA.java

License:Open Source License

/**
 * Generates a clusterer by the mean of spectral clustering algorithm.
 *
 * @param data set of instances serving as training data
 * @exception Exception if the clusterer has not been generated successfully
 *///from   www . j  a va  2  s.co  m
public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix)
        w = DoubleFactory2D.sparse.make(n, n);
    else
        w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++)
        v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    //Sets up similarity matrix
    for (int i = 0; i < n; i++)
        for (int j = i; j < n; j++) {
            /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
            if((r == -1) || (dist < r)) {
              double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
              w.set(i, j, sim);
              w.set(j, i, sim);
            }*/
            /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
             System.out.println(key[0]);
             System.out.println(key[1]);
             System.out.println(simScoreMap.containsKey(key));
             Double simValue = simScoreMap.get(key);*/

            double sim = sim_matrix[i][j];
            w.set(i, j, sim);
            w.set(j, i, sim);
        }

    //Partitions points
    int[][] p = partition(w, alpha_star);

    //Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++)
        for (int j = 0; j < p[i].length; j++)
            cluster[p[i][j]] = i;

    //System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
}

From source file:cba.Apriori.java

License:Open Source License

/**
 * Method that generates all large itemsets with a minimum support, and from
 * these all association rules with a minimum confidence.
 *
 * @param instances the instances to be used for generating the associations
 * @throws Exception if rules can't be built successfully
 *//*from   www . ja v  a 2  s . c om*/
public void buildAssociations(Instances instances) throws Exception {

    double[] confidences, supports;
    int[] indices;
    FastVector[] sortedRuleSet;
    int necSupport = 0;

    instances = new Instances(instances);

    if (m_removeMissingCols) {
        instances = removeMissingColumns(instances);
    }
    if (m_car && m_metricType != CONFIDENCE)
        throw new Exception("For CAR-Mining metric type has to be confidence!");

    // only set class index if CAR is requested
    if (m_car) {
        if (m_classIndex == -1) {
            instances.setClassIndex(instances.numAttributes() - 1);
        } else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) {
            instances.setClassIndex(m_classIndex - 1);
        } else {
            throw new Exception("Invalid class index.");
        }
    }

    // can associator handle the data?
    getCapabilities().testWithFail(instances);

    m_cycles = 0;
    if (m_car) {
        //m_instances does not contain the class attribute
        m_instances = LabeledItemSet.divide(instances, false);

        //m_onlyClass contains only the class attribute
        m_onlyClass = LabeledItemSet.divide(instances, true);
    } else
        m_instances = instances;

    if (m_car && m_numRules == Integer.MAX_VALUE) {
        // Set desired minimum support
        m_minSupport = m_lowerBoundMinSupport;
    } else {
        // Decrease minimum support until desired number of rules found.
        m_minSupport = m_upperBoundMinSupport - m_delta;
        m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport;
    }

    do {

        // Reserve space for variables
        m_Ls = new FastVector();
        m_hashtables = new FastVector();
        m_allTheRules = new FastVector[6];
        m_allTheRules[0] = new FastVector();
        m_allTheRules[1] = new FastVector();
        m_allTheRules[2] = new FastVector();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            m_allTheRules[3] = new FastVector();
            m_allTheRules[4] = new FastVector();
            m_allTheRules[5] = new FastVector();
        }
        sortedRuleSet = new FastVector[6];
        sortedRuleSet[0] = new FastVector();
        sortedRuleSet[1] = new FastVector();
        sortedRuleSet[2] = new FastVector();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            sortedRuleSet[3] = new FastVector();
            sortedRuleSet[4] = new FastVector();
            sortedRuleSet[5] = new FastVector();
        }
        if (!m_car) {
            // Find large itemsets and rules
            findLargeItemSets();
            if (m_significanceLevel != -1 || m_metricType != CONFIDENCE)
                findRulesBruteForce();
            else
                findRulesQuickly();
        } else {
            findLargeCarItemSets();
            findCarRulesQuickly();
        }

        // Sort rules according to their support
        /* supports = new double[m_allTheRules[2].size()];
         for (int i = 0; i < m_allTheRules[2].size(); i++) 
        supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();
         indices = Utils.stableSort(supports);
         for (int i = 0; i < m_allTheRules[2].size(); i++) {
        sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
        sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
        sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
        sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
        sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
        sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
        }
         }*/

        int j = m_allTheRules[2].size() - 1;
        supports = new double[m_allTheRules[2].size()];
        for (int i = 0; i < (j + 1); i++)
            supports[j - i] = ((double) ((ItemSet) m_allTheRules[1].elementAt(j - i)).support()) * (-1);
        indices = Utils.stableSort(supports);
        for (int i = 0; i < (j + 1); i++) {
            sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j - i]));
            sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j - i]));
            sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j - i]));
            if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
                sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j - i]));
                sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j - i]));
                sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j - i]));
            }
        }

        // Sort rules according to their confidence
        m_allTheRules[0].removeAllElements();
        m_allTheRules[1].removeAllElements();
        m_allTheRules[2].removeAllElements();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            m_allTheRules[3].removeAllElements();
            m_allTheRules[4].removeAllElements();
            m_allTheRules[5].removeAllElements();
        }
        confidences = new double[sortedRuleSet[2].size()];
        int sortType = 2 + m_metricType;

        for (int i = 0; i < sortedRuleSet[2].size(); i++)
            confidences[i] = ((Double) sortedRuleSet[sortType].elementAt(i)).doubleValue();
        indices = Utils.stableSort(confidences);
        for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules))
                && (i >= 0); i--) {
            m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
            m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
            m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
            if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
                m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
                m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
                m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
            }
        }

        if (m_verbose) {
            if (m_Ls.size() > 1) {
                System.out.println(toString());
            }
        }
        if (m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta > m_lowerBoundMinSupport)
            m_minSupport -= m_delta;
        else
            m_minSupport = m_lowerBoundMinSupport;

        necSupport = Math.round((float) ((m_minSupport * (double) m_instances.numInstances()) + 0.5));

        m_cycles++;
    } while ((m_allTheRules[0].size() < m_numRules) && (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport))
    /*        (necSupport >= lowerBoundNumInstancesSupport)*/
    /*        (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1));
    m_minSupport += m_delta;
}

From source file:cba.AssociatorEvaluation.java

License:Open Source License

/**
 * Evaluates the associator with the given commandline options and returns
 * the evaluation string.//from w  w  w.j a  v a  2s  .  c o  m
 * 
 * @param associator   the Associator to evaluate
 * @param options   the commandline options
 * @return      the generated output string
 * @throws Exception   if evaluation fails
 */
public static String evaluate(Associator associator, String[] options) throws Exception {

    String trainFileString = "";
    String graphFileName = "";
    AssociatorEvaluation eval;
    DataSource loader;

    // help?
    if (Utils.getFlag('h', options))
        throw new Exception("\nHelp requested.\n" + makeOptionString(associator));

    try {
        // general options
        trainFileString = Utils.getOption('t', options);
        if (trainFileString.length() == 0)
            throw new Exception("No training file given!");
        loader = new DataSource(trainFileString);

        graphFileName = Utils.getOption('g', options);

        // associator specific options
        if (associator instanceof OptionHandler) {
            ((OptionHandler) associator).setOptions(options);
        }

        // left-over options?
        Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + "\n" + makeOptionString(associator));
    }

    // load file and build associations
    eval = new AssociatorEvaluation();
    String results = eval.evaluate(associator, new Instances(loader.getDataSet()));

    // If associator is drawable output string describing graph
    if ((associator instanceof Drawable) && (graphFileName.length() != 0)) {
        BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
        writer.write(((Drawable) associator).graph());
        writer.newLine();
        writer.flush();
        writer.close();
    }

    return results;
}

From source file:CEP.GenerateStream.java

public void MakeStream() {
    File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff");
    String pc = System.getProperty("user.dir").toString();
    if (pc.contains("gs023850")) {
        file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff");
    }//from ww  w . j av a2 s.c  om
    try {
        ArffLoader loader = new ArffLoader();
        loader.setFile(file);
        Instances structure = loader.getStructure();
        int j = structure.numAttributes();

        HeaderManager.SetStructure(new Instances(structure));
        Instance current;
        long previousTimeStamp = 0;
        String timeStamp = "0";
        long wait = 0;

        while ((current = loader.getNextInstance(structure)) != null) {
            timeStamp = current.stringValue(0);
            cepRT.sendEvent(current);
            System.out.println("Sending event");
            previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait);
        }
    } catch (Exception e) {
        if (e.equals(new FileNotFoundException())) {
            System.out.println("File not found - could not generate stream");
            return;
        } else if (e.equals(new IOException())) {
            System.out.println("Unable to read file");
        } else if (e.equals(new NumberFormatException())) {
            System.out.println("Unable to convert to time to number - bad time");
        } else {
            System.out.println(e.toString());
        }
    }
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidate(Classifier model, Instances datax, int folds, boolean show_text,
        boolean show_plot, TFigureAttribute attr) {
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);//  w  w  w  . j  av  a2 s .  c o  m
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }
    Evaluation eval = null;
    try {
        // perform cross-validation
        eval = new Evaluation(randData);
        //            double[] simulated = new double[0];
        //            double[] observed = new double[0];
        //            double[] sim = new double[0];
        //            double[] obs = new double[0];
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances validation = randData.testCV(folds, n);
            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);

            //                sim = eval.evaluateModel(clsCopy, validation);
            //                obs = validation.attributeToDoubleArray(validation.classIndex());
            //                if (show_plot) {
            //                    double[][] d = new double[2][sim.length];
            //                    d[0] = obs;
            //                    d[1] = sim;
            //                    CMatrix f1 = CMatrix.getInstance(d);
            //                    f1.transpose().plot(attr);
            //                }
            //                if (show_text) {
            //                    // output evaluation
            //                    System.out.println();
            //                    System.out.println("=== Setup for each Cross Validation fold===");
            //                    System.out.println("Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            //                    System.out.println("Dataset: " + randData.relationName());
            //                    System.out.println("Folds: " + folds);
            //                    System.out.println("Seed: " + 1);
            //                    System.out.println();
            //                    System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
            //                }
            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
            //                simulated = FactoryUtils.mean(simulated,eval.evaluateModel(clsCopy, validation));
            //                observed = FactoryUtils.mean(observed,validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test,
        boolean show_text, boolean show_plot) {
    TFigureAttribute attr = new TFigureAttribute();
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);/*  w  ww .  j a  v  a 2 s  . co m*/

    Evaluation eval = null;
    int folds = randData.numInstances();
    try {
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            //                randData.randomize(rand);
            //                Instances train = randData;                
            Instances train = randData.trainCV(folds, n);
            //                Instances train = randData.trainCV(folds, n, rand);
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            Instances validation = randData.testCV(folds, n);
            //                Instances validation = test.testCV(test.numInstances(), n%test.numInstances());
            //                CMatrix.fromInstances(train).showDataGrid();
            //                CMatrix.fromInstances(validation).showDataGrid();

            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

private static Evaluation doTest(boolean isTrained, Classifier model, Instances train, Instances test,
        boolean show_text, boolean show_plot, TFigureAttribute attr) {
    Instances data = new Instances(train);
    Random rand = new Random(1);
    data.randomize(rand);//  www. j av  a2s  .  co m
    Evaluation eval = null;
    try {
        //            double[] simulated = null;
        eval = new Evaluation(train);
        if (isTrained) {
            simulated = eval.evaluateModel(model, test);
        } else {
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            simulated = eval.evaluateModel(clsCopy, test);
        }
        if (show_plot) {
            observed = test.attributeToDoubleArray(test.classIndex());
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            String[] items = { "Observed", "Simulated" };
            attr.items = items;
            attr.figureCaption = model.getClass().getCanonicalName();
            f1.transpose().plot(attr);
            //                if (attr.axis[0].isEmpty() && attr.axis[1].isEmpty()) {
            //                    f1.transpose().plot(attr);
            //                } else {
            //                    f1.transpose().plot(model.getClass().getCanonicalName(), attr.items, attr.axis);
            //                }
        }
        if (show_text) {
            System.out.println();
            System.out.println("=== Setup for Test ===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + test.relationName());
            System.out.println();
            System.out.println(eval.toSummaryString("=== Test Results ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) {
    Influence[] ret = null;//from   w w  w  . j a va 2 s .  com
    try {
        Instances data = DataSource.read(filePath);
        ret = new Influence[data.numAttributes() - 1];
        data.setClassIndex(data.numAttributes() - 1);
        // other options
        int seed = 1;
        int folds = 10;
        // randomize data
        Instances randData = new Instances(data);
        Random rand = new Random(seed);
        randData.randomize(rand);
        Evaluation evalBase = getEvaluation(randData, model, folds);
        double accBase = evalBase.correct() / evalBase.numInstances() * 100;
        double nf = randData.numAttributes();

        for (int j = 0; j < nf - 1; j++) {
            ret[j] = new Influence();
            String str = randData.attribute(j).name();
            Attribute att = randData.attribute(j);
            randData.deleteAttributeAt(j);
            Evaluation evalTemp = getEvaluation(randData, model, folds);
            double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100;
            double tempInfluence = accBase - accTemp;
            ret[j].attributeName = str;
            ret[j].infVal = tempInfluence;
            randData.insertAttributeAt(att, j);
        }
        sortInfluenceArray(ret);
    } catch (Exception ex) {
        Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static Instances getSubsetData(Instances data, String[] attList) {
    Instances temp = new Instances(data);
    for (int i = 0; i < data.numAttributes(); i++) {
        if (!temp.attribute(0).equals(temp.classAttribute())) {
            temp.deleteAttributeAt(0);//www. j  av a 2  s  .  com
        }
    }
    double[][] m = new double[attList.length + 1][data.numInstances()];
    for (int i = 0; i < attList.length; i++) {
        int n = attList.length - 1 - i;
        String str = attList[n];
        Attribute t = data.attribute(str);
        double[] d = data.attributeToDoubleArray(t.index());
        m[n] = d;
        temp.insertAttributeAt(t, 0);
    }
    m[attList.length] = data.attributeToDoubleArray(data.classIndex());
    m = CMatrix.getInstance(m).transpose().get2DArrayDouble();

    FastVector att = new FastVector();
    for (int i = 0; i < temp.numAttributes(); i++) {
        att.addElement(temp.attribute(i));
    }
    Instances ret = new Instances(temp.relationName(), att, m.length);
    for (int i = 0; i < m.length; i++) {
        Instance ins = new Instance(m[0].length);
        for (int j = 0; j < m[0].length; j++) {
            ins.setValue(j, m[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(temp.classIndex());

    return ret;
}