Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:DiversifyQuery.DivTopK.java

/**
 * Sets the format of the filtered instances that are output. I.e. will
 * include k attributes each shapelet distance and a class value
 *
 * @param inputFormat the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception if all required parameters of the filter are not
 * initialised correctly//from   ww w. j a v a2 s  .co m
 */
protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets)
        throws Exception {

    //Set up instances size and format.
    //int length = this.numShapelets;
    int length = shapelets.size();
    FastVector atts = new FastVector();
    String name;
    for (int i = 0; i < length; i++) {
        name = "Shapelet_" + i;
        atts.addElement(new Attribute(name));
    }

    if (inputFormat.classIndex() >= 0) { //Classification set, set class
        //Get the class values as a fast vector
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        FastVector vals = new FastVector(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.addElement(target.value(i));
        }
        atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts,
            inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * Sets the format of the filtered instances that are output. I.e. will
 * include k attributes each shapelet distance and a class value
 *
 * @param inputFormat the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception if all required parameters of the filter are not
 * initialised correctly/*from   ww  w  .j  av  a  2s . c  o m*/
 */
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    if (this.numShapelets < 1) {
        throw new Exception(
                "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1");
    }

    //Set up instances size and format.
    //int length = this.numShapelets;
    int length = this.shapelets.size();
    FastVector atts = new FastVector();
    String name;
    for (int i = 0; i < length; i++) {
        name = "Shapelet_" + i;
        atts.addElement(new Attribute(name));
    }

    if (inputFormat.classIndex() >= 0) { //Classification set, set class
        //Get the class values as a fast vector
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        FastVector vals = new FastVector(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.addElement(target.value(i));
        }
        atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts,
            inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

@Override
public Instances process(Instances data) throws Exception {
    if (this.numShapelets < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }/*w  w  w . j  a  va  2 s  .  c  om*/

    int maxPossibleLength = data.instance(0).numAttributes() - 1;
    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    if (this.minShapeletLength < 1 || this.maxShapeletLength < 1
            || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) {
        throw new Exception("Shapelet length parameters initialised incorrectly");
    }

    //Sort data in round robin order
    dataSourceIDs = new int[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        dataSourceIDs[i] = i;
    }
    //        data = roundRobinData(data, dataSourceIDs);

    if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so
        this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength,
                this.maxShapeletLength); // get k shapelets ATTENTION
        this.shapeletsTrained = true;
        if (!supressOutput) {
            System.out.println(shapelets.size() + " Shapelets have been generated");
        }
    }

    Instances output = determineOutputFormat(data);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(this.shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : this.shapelets) {
            double dist = subseqDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(this.shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

public ArrayList<LegacyShapelet> findDiversityTopKShapelets(int numShapelets, Instances data,
        int minShaepeletLength, int maxShapeletLength) throws Exception {

    ArrayList<LegacyShapelet> kShapelets = new ArrayList<LegacyShapelet>(); //store up to k shapeles overall
    ArrayList<LegacyShapelet> tempKShapelets; //store temporary k shapelets each iteration
    ArrayList<LegacyShapelet> seriesShapelets = new ArrayList<LegacyShapelet>(); //store all temporary k shapelets each itreration for diversifying process

    int saxLENGTH = 15;
    int w = 4;/*from  w  w  w .  j a v a 2  s  . co  m*/
    int R = 10;
    double percentMask = 0.25;
    int topK = 10;

    TreeMap<Double, Integer> classDistributions = getClassDistributions(data); //calc info gain//calc info gain//calc info gain//calc info gain

    int numClass = classDistributions.size();

    if (!supressOutput) {
        System.out.println("Processing data: ");
    }

    int numInstances = data.numInstances();

    for (int length = minShaepeletLength; length <= maxShapeletLength; length++) {

        createSAXList(length, saxLENGTH, w, data);

        randomProjection(R, percentMask, saxLENGTH);

        scoreAllSAX(R, numClass, data);

        tempKShapelets = findBestTopKSAX(length, topK, data, numClass);

        for (int i = 0; i < tempKShapelets.size(); i++) {
            seriesShapelets.add(tempKShapelets.get(i));
        }

        USAXMap.clear();
        scoreList.clear();

    }

    ArrayList<GraphNode> Graph = new ArrayList<GraphNode>();
    Graph = constructShapeletGraph(seriesShapelets, data);
    kShapelets = DiversifyTopKQuery(Graph, numShapelets);

    return kShapelets;
    //        return seriesShapelets;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

public ArrayList<LegacyShapelet> findBestTopKSAX(int subsequenceLength, int top_k, Instances data,
        int numClass) {
    int numObject = data.numInstances();
    ArrayList<Point> Dist = new ArrayList<>(numObject);
    int word;//from w w w .  j  a  v a2 s  . c  o m
    int kk;
    double gain, distanceThreshold, gap;
    int qObject, qPosition;
    USAXElmentType usax;

    TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain

    double[] candidate = new double[subsequenceLength];
    ArrayList<LegacyShapelet> shapelets = new ArrayList<LegacyShapelet>();
    if (top_k > 0) {
        Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Double>>() {
            @Override
            public int compare(Map.Entry<Integer, Double> a, Map.Entry<Integer, Double> b) {
                return ((Double) b.getValue()).compareTo((Double) a.getValue());
            }
        });
    }
    for (int k = 0; k < Math.min(top_k, (int) scoreList.size()); k++) {

        word = scoreList.get(k).getKey();
        usax = USAXMap.get(word);
        for (kk = 0; kk < Math.min((int) usax.SAXIdArrayList.size(), 1); kk++) {
            qObject = usax.SAXIdArrayList.get(kk).x;
            qPosition = usax.SAXIdArrayList.get(kk).y;

            for (int i = 0; i < subsequenceLength; i++) {
                candidate[i] = data.instance(qObject).value(qPosition + i);
            }
            candidate = zNorm(candidate, false);
            LegacyShapelet candidateShapelet = checkCandidate(candidate, data, qObject, qPosition,
                    classDistributions, null);
            shapelets.add(candidateShapelet);
        }
    }
    return shapelets;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

protected void createSAXList(int subsequenceLength, int saxLength, int w, Instances data) {

    w = (int) Math.ceil((double) subsequenceLength / saxLength);
    saxLength = (int) Math.ceil((double) subsequenceLength / w);

    double ex, ex2, mean, std;
    double[] sumSegment = new double[saxLength]; //sumsegment??
    int[] elementSegment = new int[saxLength];
    int j, jSt, k, slot, objectId;
    double dataPoint;
    int word, previousWord;
    for (k = 0; k < saxLength; k++) {
        elementSegment[k] = w;//from   w  w w  .  j  ava 2s .  c o  m
    }
    elementSegment[saxLength - 1] = subsequenceLength - (saxLength - 1) * w; // w

    for (objectId = 0; objectId < data.numInstances(); objectId++) {
        ex = ex2 = 0;
        previousWord = -1;

        for (k = 0; k < saxLength; k++) {
            sumSegment[k] = 0;
        }
        double[] timeSeriesObject = data.instance(objectId).toDoubleArray();

        //case 1: Initial
        for (j = 0; (j < timeSeriesObject.length - 1) && (j < subsequenceLength); j++) {
            dataPoint = timeSeriesObject[j];
            ex += dataPoint;
            ex2 += dataPoint * dataPoint;
            slot = (int) Math.floor(j / w); //slotw?
            sumSegment[slot] += dataPoint; // 
        }
        //case 2: slightly update
        for (j = j; j <= timeSeriesObject.length - 1; j++) {
            jSt = j - subsequenceLength;
            mean = ex / subsequenceLength;
            std = Math.sqrt(ex2 / subsequenceLength - mean * mean);

            //create SAX from sumSegment
            word = createSAXWord(sumSegment, elementSegment, mean, std, saxLength);

            if (word != previousWord) {
                previousWord = word;
                if (!(USAXMap.containsKey(word))) {
                    USAXMap.put(word, null);
                    USAXElmentType usax = new USAXElmentType();
                    usax.objectHashSet.add(objectId);
                    usax.SAXIdArrayList.add(new Point(objectId, jSt));
                    USAXMap.put(word, usax);
                } else {
                    USAXMap.get(word).objectHashSet.add(objectId);
                    USAXMap.get(word).SAXIdArrayList.add(new Point(objectId, jSt)); ////////
                }
            }
            /// for next updata
            if (j < timeSeriesObject.length - 1) {
                ex -= timeSeriesObject[jSt];
                ex2 -= timeSeriesObject[jSt] * timeSeriesObject[jSt];

                for (k = 0; k < saxLength - 1; k++) {
                    sumSegment[k] -= timeSeriesObject[jSt + k * w];
                    sumSegment[k] += timeSeriesObject[jSt + (k + 1) * w];
                }
                sumSegment[k] -= timeSeriesObject[jSt + k * w];
                sumSegment[k] += timeSeriesObject[jSt + Math.min((k + 1) * w, subsequenceLength)];

                dataPoint = timeSeriesObject[j];
                ex += dataPoint;
                ex2 += dataPoint * dataPoint;
            }
        }

    }

}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * protected method to check a candidate shapelet. Functions by passing in
 * the raw data, and returning an assessed ShapeletTransform object.
 *
 * @param candidate the data from the candidate ShapeletTransform
 * @param data the entire data set to compare the candidate to
 * @param seriesId series id from the dataset that the candidate came from
 * @param startPos start position in the series where the candidate came
 * from//from  w w w  .jav a2s  .c  o m
 * @param classDistribution a TreeMap<Double, Integer> in the form of
 * <Class Value, Frequency> to describe the dataset composition
 * @param qualityBound
 * @return a fully-computed ShapeletTransform, including the quality of this
 * candidate
 */
protected LegacyShapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos,
        TreeMap classDistribution, QualityBound.ShapeletQualityBound qualityBound) {

    // create orderline by looping through data set and calculating the subsequence
    // distance from candidate to all data, inserting in order.
    ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>();

    boolean pruned = false;

    for (int i = 0; i < data.numInstances(); i++) {
        //Check if it is possible to prune the candidate
        if (qualityBound != null) {
            if (qualityBound.pruneCandidate()) {
                pruned = true;
                break;
            }
        }

        double distance = 0.0;
        if (i != seriesId) {
            distance = subseqDistance(candidate, data.instance(i));
        }

        double classVal = data.instance(i).classValue();
        // without early abandon, it is faster to just add and sort at the end
        orderline.add(new OrderLineObj(distance, classVal));

        //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure.
        if (qualityBound != null) {
            qualityBound.updateOrderLine(orderline.get(orderline.size() - 1));
        }
    }

    // note: early abandon entropy pruning would appear here, but has been ommitted
    // in favour of a clear multi-class information gain calculation. Could be added in
    // this method in the future for speed up, but distance early abandon is more important
    //If shapelet is pruned then it should no longer be considered in further processing
    if (pruned) {
        return null;
    } else {
        // create a shapelet object to store all necessary info, i.e.
        LegacyShapelet shapelet = new LegacyShapelet(candidate, seriesId, startPos, this.qualityMeasure);
        shapelet.calculateQuality(orderline, classDistribution);
        shapelet.calcInfoGainAndThreshold(orderline, classDistribution);
        return shapelet;
    }
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * Private method to calculate the class distributions of a dataset. Main
 * purpose is for computing shapelet qualities.
 *
 * @param data the input data set that the class distributions are to be
 * derived from//from   www  .  ja v a  2s . co  m
 * @return a TreeMap<Double, Integer> in the form of
 * <Class Value, Frequency>
 */
public static TreeMap<Double, Integer> getClassDistributions(Instances data) {
    TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>();
    double classValue;
    for (int i = 0; i < data.numInstances(); i++) {
        classValue = data.instance(i).classValue();
        boolean classExists = false;
        for (Double d : classDistribution.keySet()) {
            if (d == classValue) {
                int temp = classDistribution.get(d);
                temp++;
                classDistribution.put(classValue, temp);
                classExists = true;
            }
        }

        if (classExists == false) {
            classDistribution.put(classValue, 1);
        }
    }
    return classDistribution;
}

From source file:dkpro.similarity.experiments.rte.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);//from   w w w .  j  av a  2 s .c  om

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(classifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());

    // Prepare output scores
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output classifications
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".csv"), sb.toString());

    // Output prediction arff
    DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/"
            + dataset.toString() + ".predicted.arff", predictedData);

    // Output meta information
    sb = new StringBuilder();
    sb.append(baseClassifier.toString() + LF);
    sb.append(eval.toSummaryString() + LF);
    sb.append(eval.toMatrixString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".meta.txt"), sb.toString());
}

From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });
        Instances data = DataSource.read(
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff");
        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);//from  w  w  w .  j ava 2s  .c  o m

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}