Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:DiversifyQuery.DivTopK.java

public Instances transformData(Instances data) throws Exception {
    ArrayList<LegacyShapelet> shapelets = new ArrayList<>();
    for (int i = 5; i <= 1; i--) {
        if (DResultSet.get(i).result.size() == i) {
            shapelets.addAll(DResultSet.get(i).result);
        }/*w  ww. j a  v  a  2s.  c o m*/
    }
    if (shapelets.size() < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }

    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    Instances output = determineOutputFormat(data, shapelets);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : shapelets) {
            double dist = subsequenceDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:DiversifyQuery.DivTopK.java

public static ArrayList<LegacyShapelet> readShapelets(String fileName, Instances data) {
    ArrayList<LegacyShapelet> shapeletsList = new ArrayList<>();
    LegacyShapelet shapelet = new LegacyShapelet();
    int obj, pos, length;
    double gain, gap, distanceThreshold;
    try {/*from   www  .ja  v a2 s.co  m*/
        Scanner sc = new Scanner(new File("shapeletsResult.txt"));
        while (sc.hasNext()) {
            shapelet = new LegacyShapelet(sc.nextInt(), sc.nextInt(), sc.nextInt(), sc.nextDouble(),
                    sc.nextDouble(), sc.nextDouble());
            double[] contentValue = new double[shapelet.length];
            for (int i = 0; i < shapelet.length; i++) {
                contentValue[i] = data.instance(shapelet.seriesId).value(shapelet.startPos + i);
            }
            shapelet.content = contentValue;
            shapeletsList.add(shapelet);
        }
    } catch (Exception e) {
        System.out.println("?shapelets!");
        e.printStackTrace();
    }
    return shapeletsList;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

@Override
public Instances process(Instances data) throws Exception {
    if (this.numShapelets < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }/*from   w  w w.  j  ava2s .c  o m*/

    int maxPossibleLength = data.instance(0).numAttributes() - 1;
    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    if (this.minShapeletLength < 1 || this.maxShapeletLength < 1
            || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) {
        throw new Exception("Shapelet length parameters initialised incorrectly");
    }

    //Sort data in round robin order
    dataSourceIDs = new int[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        dataSourceIDs[i] = i;
    }
    //        data = roundRobinData(data, dataSourceIDs);

    if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so
        this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength,
                this.maxShapeletLength); // get k shapelets ATTENTION
        this.shapeletsTrained = true;
        if (!supressOutput) {
            System.out.println(shapelets.size() + " Shapelets have been generated");
        }
    }

    Instances output = determineOutputFormat(data);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(this.shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : this.shapelets) {
            double dist = subseqDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(this.shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

public ArrayList<LegacyShapelet> findBestTopKSAX(int subsequenceLength, int top_k, Instances data,
        int numClass) {
    int numObject = data.numInstances();
    ArrayList<Point> Dist = new ArrayList<>(numObject);
    int word;//w w  w . j a v a  2s .co  m
    int kk;
    double gain, distanceThreshold, gap;
    int qObject, qPosition;
    USAXElmentType usax;

    TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain

    double[] candidate = new double[subsequenceLength];
    ArrayList<LegacyShapelet> shapelets = new ArrayList<LegacyShapelet>();
    if (top_k > 0) {
        Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Double>>() {
            @Override
            public int compare(Map.Entry<Integer, Double> a, Map.Entry<Integer, Double> b) {
                return ((Double) b.getValue()).compareTo((Double) a.getValue());
            }
        });
    }
    for (int k = 0; k < Math.min(top_k, (int) scoreList.size()); k++) {

        word = scoreList.get(k).getKey();
        usax = USAXMap.get(word);
        for (kk = 0; kk < Math.min((int) usax.SAXIdArrayList.size(), 1); kk++) {
            qObject = usax.SAXIdArrayList.get(kk).x;
            qPosition = usax.SAXIdArrayList.get(kk).y;

            for (int i = 0; i < subsequenceLength; i++) {
                candidate[i] = data.instance(qObject).value(qPosition + i);
            }
            candidate = zNorm(candidate, false);
            LegacyShapelet candidateShapelet = checkCandidate(candidate, data, qObject, qPosition,
                    classDistributions, null);
            shapelets.add(candidateShapelet);
        }
    }
    return shapelets;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

protected void createSAXList(int subsequenceLength, int saxLength, int w, Instances data) {

    w = (int) Math.ceil((double) subsequenceLength / saxLength);
    saxLength = (int) Math.ceil((double) subsequenceLength / w);

    double ex, ex2, mean, std;
    double[] sumSegment = new double[saxLength]; //sumsegment??
    int[] elementSegment = new int[saxLength];
    int j, jSt, k, slot, objectId;
    double dataPoint;
    int word, previousWord;
    for (k = 0; k < saxLength; k++) {
        elementSegment[k] = w;//from  w  w  w  .j a va  2s  .co  m
    }
    elementSegment[saxLength - 1] = subsequenceLength - (saxLength - 1) * w; // w

    for (objectId = 0; objectId < data.numInstances(); objectId++) {
        ex = ex2 = 0;
        previousWord = -1;

        for (k = 0; k < saxLength; k++) {
            sumSegment[k] = 0;
        }
        double[] timeSeriesObject = data.instance(objectId).toDoubleArray();

        //case 1: Initial
        for (j = 0; (j < timeSeriesObject.length - 1) && (j < subsequenceLength); j++) {
            dataPoint = timeSeriesObject[j];
            ex += dataPoint;
            ex2 += dataPoint * dataPoint;
            slot = (int) Math.floor(j / w); //slotw?
            sumSegment[slot] += dataPoint; // 
        }
        //case 2: slightly update
        for (j = j; j <= timeSeriesObject.length - 1; j++) {
            jSt = j - subsequenceLength;
            mean = ex / subsequenceLength;
            std = Math.sqrt(ex2 / subsequenceLength - mean * mean);

            //create SAX from sumSegment
            word = createSAXWord(sumSegment, elementSegment, mean, std, saxLength);

            if (word != previousWord) {
                previousWord = word;
                if (!(USAXMap.containsKey(word))) {
                    USAXMap.put(word, null);
                    USAXElmentType usax = new USAXElmentType();
                    usax.objectHashSet.add(objectId);
                    usax.SAXIdArrayList.add(new Point(objectId, jSt));
                    USAXMap.put(word, usax);
                } else {
                    USAXMap.get(word).objectHashSet.add(objectId);
                    USAXMap.get(word).SAXIdArrayList.add(new Point(objectId, jSt)); ////////
                }
            }
            /// for next updata
            if (j < timeSeriesObject.length - 1) {
                ex -= timeSeriesObject[jSt];
                ex2 -= timeSeriesObject[jSt] * timeSeriesObject[jSt];

                for (k = 0; k < saxLength - 1; k++) {
                    sumSegment[k] -= timeSeriesObject[jSt + k * w];
                    sumSegment[k] += timeSeriesObject[jSt + (k + 1) * w];
                }
                sumSegment[k] -= timeSeriesObject[jSt + k * w];
                sumSegment[k] += timeSeriesObject[jSt + Math.min((k + 1) * w, subsequenceLength)];

                dataPoint = timeSeriesObject[j];
                ex += dataPoint;
                ex2 += dataPoint * dataPoint;
            }
        }

    }

}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

public double calcScore(USAXElmentType usax, int R, int numClass, Instances data) { //
    double score = -1;
    int cid, count;
    Iterator objectIt = usax.getObjectCountHashMap().entrySet().iterator();

    ArrayList<Double> cIn = new ArrayList<>();
    ArrayList<Double> cOut = new ArrayList<>();

    for (int i = 0; i < numClass; i++) {
        cIn.add(0.0);//  w w  w .j a  v a  2  s.  c o  m
        cOut.add(0.0);
    }

    while (objectIt.hasNext()) {
        Map.Entry entry = (Map.Entry) objectIt.next();
        cid = (int) data.instance((int) entry.getKey()).classValue();
        count = (int) entry.getValue();
        cIn.set(cid, cIn.get(cid) + count);
        cOut.set(cid, cOut.get(cid) + (R - count));
    }
    score = calScoreFromObjectCount(cIn, cOut, numClass);
    return score;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * protected method to check a candidate shapelet. Functions by passing in
 * the raw data, and returning an assessed ShapeletTransform object.
 *
 * @param candidate the data from the candidate ShapeletTransform
 * @param data the entire data set to compare the candidate to
 * @param seriesId series id from the dataset that the candidate came from
 * @param startPos start position in the series where the candidate came
 * from/*  w  w w . jav a 2  s. com*/
 * @param classDistribution a TreeMap<Double, Integer> in the form of
 * <Class Value, Frequency> to describe the dataset composition
 * @param qualityBound
 * @return a fully-computed ShapeletTransform, including the quality of this
 * candidate
 */
protected LegacyShapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos,
        TreeMap classDistribution, QualityBound.ShapeletQualityBound qualityBound) {

    // create orderline by looping through data set and calculating the subsequence
    // distance from candidate to all data, inserting in order.
    ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>();

    boolean pruned = false;

    for (int i = 0; i < data.numInstances(); i++) {
        //Check if it is possible to prune the candidate
        if (qualityBound != null) {
            if (qualityBound.pruneCandidate()) {
                pruned = true;
                break;
            }
        }

        double distance = 0.0;
        if (i != seriesId) {
            distance = subseqDistance(candidate, data.instance(i));
        }

        double classVal = data.instance(i).classValue();
        // without early abandon, it is faster to just add and sort at the end
        orderline.add(new OrderLineObj(distance, classVal));

        //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure.
        if (qualityBound != null) {
            qualityBound.updateOrderLine(orderline.get(orderline.size() - 1));
        }
    }

    // note: early abandon entropy pruning would appear here, but has been ommitted
    // in favour of a clear multi-class information gain calculation. Could be added in
    // this method in the future for speed up, but distance early abandon is more important
    //If shapelet is pruned then it should no longer be considered in further processing
    if (pruned) {
        return null;
    } else {
        // create a shapelet object to store all necessary info, i.e.
        LegacyShapelet shapelet = new LegacyShapelet(candidate, seriesId, startPos, this.qualityMeasure);
        shapelet.calculateQuality(orderline, classDistribution);
        shapelet.calcInfoGainAndThreshold(orderline, classDistribution);
        return shapelet;
    }
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

/**
 * Private method to calculate the class distributions of a dataset. Main
 * purpose is for computing shapelet qualities.
 *
 * @param data the input data set that the class distributions are to be
 * derived from//w ww  .j  a  v a2 s .  com
 * @return a TreeMap<Double, Integer> in the form of
 * <Class Value, Frequency>
 */
public static TreeMap<Double, Integer> getClassDistributions(Instances data) {
    TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>();
    double classValue;
    for (int i = 0; i < data.numInstances(); i++) {
        classValue = data.instance(i).classValue();
        boolean classExists = false;
        for (Double d : classDistribution.keySet()) {
            if (d == classValue) {
                int temp = classDistribution.get(d);
                temp++;
                classDistribution.put(classValue, temp);
                classExists = true;
            }
        }

        if (classExists == false) {
            classDistribution.put(classValue, 1);
        }
    }
    return classDistribution;
}

From source file:dkpro.similarity.experiments.rte.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);/*from   w  w  w  .ja va2s  .com*/

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(classifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());

    // Prepare output scores
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output classifications
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".csv"), sb.toString());

    // Output prediction arff
    DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/"
            + dataset.toString() + ".predicted.arff", predictedData);

    // Output meta information
    sb = new StringBuilder();
    sb.append(baseClassifier.toString() + LF);
    sb.append(eval.toSummaryString() + LF);
    sb.append(eval.toMatrixString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".meta.txt"), sb.toString());
}

From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });
        Instances data = DataSource.read(
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff");
        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);// w  w w  . ja v  a2 s.  c o  m

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}