Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java

License:Open Source License

@Override
public Model buildModel(List<ClassifyUnit> cus, FeatureUnitConfiguration fuc, AbstractFeatureQuantifier fq,
        File trainingDataFile) {//  w  ww.j  a v a 2  s.  c  o m

    Instances trainingSet = initTrainingSet(cus);
    for (ClassifyUnit classifyUnit : cus) {
        trainingSet.add(instance(((ZoneClassifyUnit) classifyUnit), trainingSet));
    }

    //      // wir merken uns, dass das Training noch nicht abgeschlossen ist ...
    //      classifierBuilt = false;
    WekaModel model = new WekaModel();

    model.setTrainingData(trainingSet);
    model.setClassifierName(this.getClass().getSimpleName());
    model.setFQName(fq.getClass().getSimpleName());
    model.setDataFile(trainingDataFile);
    model.setFuc(fuc);
    model.setFUOrder(fq.getFeatureUnitOrder());

    return model;

}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }// ww  w .j  a v  a 2 s  .c o m

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances testSetToInstances(List<double[]> testSet) {
    if (testSet.size() == 0) {
        logger.warn("TestSet has size 0");
    }//from  w w w  .  ja  va2  s.  c om
    final double[] sample = testSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }
    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);
    fvWekaAttributes.add(ClassAttribute);

    // Declare the feature vector
    final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size());
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : testSet) {
        final Instance wekaInstance = new DenseInstance(1, instance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:detplagiasi.TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("contents", (FastVector) null));

    /*//from w w  w  . j  av a 2  s .c o m
    ArrayList atts = new ArrayList(2);
    atts.addElement(new Attribute("filename", (ArrayList) null));
    atts.addElement(new Attribute("contents", (ArrayList) null));
     */
    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);
    File dir = new File(directoryPath);
    String[] files = dir.list();
    //create file a untuk menampung name file dari instance yang terkait
    //FileWriter fstream = new FileWriter(directoryPath+"\\cluster detail.txt");
    BufferedWriter out = null;
    out = new BufferedWriter(new FileWriter(directoryPath + "\\cluster detail.txt"));

    for (int i = 0; i < files.length; i++) {
        if (files[i].endsWith(".txt")) {
            out.write("file ke " + (i + 1) + ": " + files[i]);
            System.out.println("processed files:" + files[i]);
            fileName[i] = files[i];
            out.write("file ke " + (i + 1) + ": " + files[i]);
            try {
                double[] newInst = new double[2];
                newInst[0] = (double) data.attribute(0).addStringValue(files[i]);
                File txt = new File(directoryPath + File.separator + files[i]);

                System.out.println("TDTARFF: " + txt.getCanonicalPath());

                InputStreamReader is;
                is = new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }
                newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString());
                try {
                    out.write("file ke " + (i + 1) + ": " + files[i]);
                    System.out.println("success");
                } catch (Exception d) {
                    System.err.println(d.getLocalizedMessage());
                }
                //input pada file a nama file dari instance
                //data.add(new Instance(1.0, newInst));
                data.add(new Instance(1.0, newInst));
                //data.renameAttributeValue(data.attribute("att_name_in_data2"),"att_value_in_data2","att_value_in_data1");
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
            }
        }
    }
    return data;
}

From source file:development.CrossValidateShapelets.java

public static Instances randomise(Instances train, int[] pos) {
    //Generate a random permutation into pos
    Random r = new Random();
    for (int i = 0; i < pos.length; i++)
        pos[i] = i;/*from  w  w  w. ja  v  a2s .  c  om*/
    for (int i = 0; i < pos.length; i++) {
        int p1 = r.nextInt(pos.length);
        int p2 = r.nextInt(pos.length);
        int temp = pos[p1];
        pos[p1] = pos[p2];
        pos[p2] = temp;
    }
    Instances newD = new Instances(train, 0);
    for (int i = 0; i < pos.length; i++)
        newD.add(train.instance(pos[i]));
    return newD;
}

From source file:development.SpectralTransformComparison.java

public void run() {
    //   Set up the       
    int nosCases = 400;
    int[] nosCasesPerClass = { nosCases / 2, nosCases / 2 };
    int runs = 50;
    int minParas = 2;
    int maxParas = 10;
    ArrayList<String> names = new ArrayList<>();
    Random rand = new Random();
    c = ACFDomainClassification.setSingleClassifiers(names);

    int length = m;
    try {/*  www .  j  ava 2 s . co  m*/
        int nosTrans = 3;
        Instances[] train = new Instances[nosTrans];
        Instances[] test = new Instances[nosTrans];
        double[][] sum = new double[train.length][c.length];
        double[][] sumSq = new double[train.length][c.length];
        PowerSpectrum ps = new PowerSpectrum();
        PowerCepstrum pc = new PowerCepstrum();
        pc.useFFT();
        FFT fft = new FFT();

        OutFile of = new OutFile(path + "mean_" + m + ".csv");
        OutFile of2 = new OutFile(path + "sd_" + m + ".csv");
        System.out.println(" Running length =" + m);
        of.writeLine("classifier,PS,PC,FFT");
        of2.writeLine("classifier,PS,PC,FFT");

        for (int i = 0; i < runs; i++) {
            //Generate data AND SET NOISE LEVEL
            c = ACFDomainClassification.setSingleClassifiers(names);
            if (i % 10 == 0)
                System.out.println(" m =" + m + " performing run =" + i);
            train = new Instances[nosTrans];
            test = new Instances[nosTrans];
            //Change to simulate sin waves.
            Instances rawTrain = SimulatePowerSpectrum.generateFFTDataSet(minParas, maxParas, length,
                    nosCasesPerClass, true);
            rawTrain.randomize(rand);
            Instances rawTest = new Instances(rawTrain, 0);
            for (int k = 0; k < nosCases / 2; k++) {
                Instance r = rawTrain.remove(0);
                rawTest.add(r);
            }
            //Generate transforms                        
            train[0] = ps.process(rawTrain);
            train[1] = pc.process(rawTrain);
            train[2] = fft.process(rawTrain);

            test[0] = ps.process(rawTest);
            test[1] = pc.process(rawTest);
            test[2] = fft.process(rawTest);
            //Measure classification accuracy
            for (int j = 0; j < test.length; j++) {
                for (int k = 0; k < c.length; k++) {
                    double a = ClassifierTools.singleTrainTestSplitAccuracy(c[k], train[j], test[j]);
                    sum[j][k] += a;
                    sumSq[j][k] += a * a;
                }
            }
        }
        DecimalFormat df = new DecimalFormat("###.###");
        System.out.print("\n m=" + length);
        for (int j = 0; j < c.length; j++) {
            of.writeString(names.get(j) + ",");
            of2.writeString(names.get(j) + ",");
            for (int i = 0; i < test.length; i++) {
                sum[i][j] /= runs;
                sumSq[i][j] = sumSq[i][j] / runs - sum[i][j] * sum[i][j];
                System.out.print("," + df.format(sum[i][j]) + " (" + df.format(sumSq[i][j]) + ")");
                of.writeString(df.format(sum[i][j]) + ",");
                of2.writeString(df.format(sumSq[i][j]) + ",");
            }
            of.writeString("\n");
            of2.writeString("\n");
        }
    } catch (Exception e) {
        System.out.println(" Error =" + e);
        e.printStackTrace();
        System.exit(0);
    }

}

From source file:DiversifyQuery.DivTopK.java

public Instances transformData(Instances data) throws Exception {
    ArrayList<LegacyShapelet> shapelets = new ArrayList<>();
    for (int i = 5; i <= 1; i--) {
        if (DResultSet.get(i).result.size() == i) {
            shapelets.addAll(DResultSet.get(i).result);
        }/*from   ww  w .  j  a  va2s. c o m*/
    }
    if (shapelets.size() < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }

    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    Instances output = determineOutputFormat(data, shapelets);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : shapelets) {
            double dist = subsequenceDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java

@Override
public Instances process(Instances data) throws Exception {
    if (this.numShapelets < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }//w w w  .  ja v a 2  s  .c  o m

    int maxPossibleLength = data.instance(0).numAttributes() - 1;
    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    if (this.minShapeletLength < 1 || this.maxShapeletLength < 1
            || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) {
        throw new Exception("Shapelet length parameters initialised incorrectly");
    }

    //Sort data in round robin order
    dataSourceIDs = new int[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        dataSourceIDs[i] = i;
    }
    //        data = roundRobinData(data, dataSourceIDs);

    if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so
        this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength,
                this.maxShapeletLength); // get k shapelets ATTENTION
        this.shapeletsTrained = true;
        if (!supressOutput) {
            System.out.println(shapelets.size() + " Shapelets have been generated");
        }
    }

    Instances output = determineOutputFormat(data);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(this.shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : this.shapelets) {
            double dist = subseqDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(this.shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}

From source file:dkpro.similarity.experiments.rte.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);/*www.j a  v a2  s  .co  m*/

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(classifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());

    // Prepare output scores
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output classifications
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".csv"), sb.toString());

    // Output prediction arff
    DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/"
            + dataset.toString() + ".predicted.arff", predictedData);

    // Output meta information
    sb = new StringBuilder();
    sb.append(baseClassifier.toString() + LF);
    sb.append(eval.toSummaryString() + LF);
    sb.append(eval.toMatrixString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString()
            + "/" + dataset.toString() + ".meta.txt"), sb.toString());
}

From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });
        Instances data = DataSource.read(
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff");
        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);/*from www . ja  v a 2  s  .c o  m*/

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}