Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *//*from w  w  w .  ja  va2  s .  com*/
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java

public Integer classify(Tweet[] tweets) {
    // TEST/*w w  w .  ja  v a 2  s .  c  om*/

    // Generate two tweet examples
    Tweet exOne = new Tweet("This is good and fantastic");
    exOne.setPreprocessedText("This is good and fantastic");
    Tweet exTwo = new Tweet("Horribly, terribly bad and more");
    exTwo.setPreprocessedText("Horribly, terribly bad and more");
    Tweet exThree = new Tweet(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    exThree.setPreprocessedText(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    Tweet exFour = new Tweet("bad hate worst sick");
    exFour.setPreprocessedText("bad hate worst sick");
    tweets = new Tweet[] { exOne, exTwo, exThree, exFour };
    // TEST

    // Load model
    //        loadModel();
    // Convert Tweet to Instance type 
    // Get String Data
    // Create attributes for the Instances set
    Attribute twitter_id = new Attribute("twitter_id");
    //        Attribute body = new Attribute("body");

    FastVector classVal = new FastVector(2);
    classVal.addElement("pos");
    classVal.addElement("neg");

    Attribute class_attr = new Attribute("class_attr", classVal);

    // Add them to a list
    FastVector attrVector = new FastVector(3);
    //        attrVector.addElement(twitter_id);
    //        attrVector.addElement(new Attribute("body", (FastVector) null));
    //        attrVector.addElement(class_attr);

    // Get the number of tweets and then create predictSet
    int numTweets = tweets.length;
    Enumeration structAttrs = dataStructure.enumerateAttributes();

    //        ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes());
    while (structAttrs.hasMoreElements()) {
        attrVector.addElement((Attribute) structAttrs.nextElement());
    }
    Instances predictSet = new Instances("predictInstances", attrVector, numTweets);
    //        Instances predictSet = new Instances(dataStructure);
    predictSet.setClassIndex(2);

    // init prediction
    double prediction = -1;

    System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure));

    System.out.println("PredSet struct: " + predictSet.attribute(0));
    System.out.println("PredSet struct: " + predictSet.attribute(1));
    System.out.println("PredSet struct: " + predictSet.attribute(2));
    // Array to return predictions 
    //double[] tweetsClassified = new double[2][numTweets];
    //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets);
    for (int i = 0; i < numTweets; i++) {
        String content = (String) tweets[i].getPreprocessedText();

        System.out.println("Tweet content: " + content);

        //            attrList
        Instance tweetInstance = new Instance(predictSet.numAttributes());

        tweetInstance.setDataset(predictSet);

        tweetInstance.setValue(predictSet.attribute(0), i);
        tweetInstance.setValue(predictSet.attribute(1), content);
        tweetInstance.setClassMissing();

        predictSet.add(tweetInstance);

        try {
            // Apply string filter
            StringToWordVector filter = new StringToWordVector();

            filter.setInputFormat(predictSet);
            Instances filteredPredictSet = Filter.useFilter(predictSet, filter);

            // Apply model
            prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i));
            filteredPredictSet.instance(i).setClassValue(prediction);
            System.out.println("Classification: " + filteredPredictSet.instance(i).toString());
            System.out.println("Prediction: " + prediction);

        } catch (Exception ex) {
            Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return 0;
}

From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java

License:Apache License

public Instances getWekaInstances() {
    Instances instances = null;
    List<DActivationPatternPackage> dActivationPatternPackages = mAnalysisChain.getFinalLayers().get(0)
            .getResultAnalysisPackage().getActivationPatternPackages();
    int counter = 0;
    for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) {
        if (counter > 0) {
            throw new RuntimeException("More than one DActivationPatternPackage found!");
        }// w  w w  .j a v a 2  s.  co m
        counter++;
        DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0);
        FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length);
        for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) {
            Attribute attribute = new Attribute(j + "");
            fvWekaAttributes.addElement(attribute);
        }

        Set<String> labelSet = getLabelSet(dActivationPatternPackage);

        FastVector classValues = new FastVector(labelSet.size());
        for (String label : labelSet) {
            classValues.addElement(label);
        }

        Attribute classAttribute = new Attribute("Class", classValues);
        fvWekaAttributes.addElement(classAttribute);

        instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes,
                dActivationPatternPackage.getActivationPatterns().size());
        instances.setClassIndex(instances.numAttributes() - 1);

        for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) {
            DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i);
            Instance instance = new Instance(fvWekaAttributes.size());

            for (int j = 0; j < activationPattern.getRawPattern().length; j++) {
                instance.setValue((Attribute) fvWekaAttributes.elementAt(j),
                        activationPattern.getRawPattern()[j]);
            }

            instance.setDataset(instances);

            DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData();
            List<String> keys = metadata.getMetaDataKeys();
            for (int k = 0; k < keys.size(); k++) {
                if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) {
                    String label = metadata.getMetaDataEntries().get(k);
                    instance.setClassValue(label);
                    break;
                }
            }

            instances.add(instance);
        }

    }
    return instances;
}

From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java

License:Apache License

protected Instances generateInstancesHeader(DActivationPatternPackage dActivationPatternPackage) {
    DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0);
    FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length);
    for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) {
        Attribute attribute = new Attribute(j + "");
        fvWekaAttributes.addElement(attribute);
    }// www .ja  v a 2 s . co m

    Set<String> labelSet = getLabelSet(dActivationPatternPackage);

    FastVector classValues = new FastVector(labelSet.size());
    for (String label : labelSet) {
        classValues.addElement(label);
    }

    Attribute classAttribute = new Attribute("Class", classValues);
    fvWekaAttributes.addElement(classAttribute);

    Instances instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes,
            dActivationPatternPackage.getActivationPatterns().size());
    instances.setClassIndex(instances.numAttributes() - 1);
    return instances;
}

From source file:at.tuflowgraphy.semanticapps.semdroid.utils.ArffHelper.java

License:Apache License

public Instances getWekaInstances(AnalysisChain analysisChain, String name) {
    Instances instances = null;
    List<DActivationPatternPackage> dActivationPatternPackages = analysisChain.getFinalLayers().get(0)
            .getResultAnalysisPackage().getActivationPatternPackages();
    int counter = 0;
    for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) {
        if (counter > 0) {
            //                String resultFileName = arffFile.getName();
            //                String newName = resultFileName.split("_")[0];
            //                int index = resultFileName.indexOf("_");
            //                newName += "-MISSING-" + counter + "-"
            //                        + resultFileName.substring(index);
            //                arffFileToWriteTo = new File(arffFile.getParentFile(), newName);
            System.err.println("ERROR: Multiple activation pattern packages found! Should not happen...");
        }/*  ww  w  . j a va  2s .  c  o  m*/
        counter++;
        DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0);
        FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length);
        for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) {
            Attribute attribute = new Attribute(j + "");
            fvWekaAttributes.addElement(attribute);
        }

        Set<String> labelSet = getLabelSet(dActivationPatternPackage);

        FastVector classValues = new FastVector(labelSet.size());
        for (String label : labelSet) {
            classValues.addElement(label);
        }

        Attribute classAttribute = new Attribute("Class", classValues);
        fvWekaAttributes.addElement(classAttribute);

        instances = new Instances(name, fvWekaAttributes,
                dActivationPatternPackage.getActivationPatterns().size());
        instances.setClassIndex(instances.numAttributes() - 1);

        for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) {
            DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i);
            Instance instance = new Instance(fvWekaAttributes.size());

            for (int j = 0; j < activationPattern.getRawPattern().length; j++) {
                instance.setValue((Attribute) fvWekaAttributes.elementAt(j),
                        activationPattern.getRawPattern()[j]);
            }

            instance.setDataset(instances);

            DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData();
            List<String> keys = metadata.getMetaDataKeys();
            for (int k = 0; k < keys.size(); k++) {
                if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) {
                    String label = metadata.getMetaDataEntries().get(k);

                    // TODO: dynamically add new labels to instances so that getLabelSet for-loop is not required

                    // System.out.println(label);
                    // if(!labelSet.contains(label)) {
                    // labelSet.add(label);
                    // // classValues.addElement(label);
                    // classAttribute.addStringValue(label);
                    // instances.attribute(instances.classIndex()).addValue(label);
                    // System.out.println("ADDED " + label);
                    // }
                    instance.setClassValue(label);
                    // TODO: only first class value used
                    break;
                }
            }
            instances.add(instance);
        }

    }
    return instances;
}

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * the target function in fitness form/*from  w w  w . j a va 2 s .c om*/
 * 
 * @return   classification accuracy
 */
public double ensembleClassify() {
    double fitnessValue = 0.0;
    double classifiersScore = 0.0;

    /* load in the modified data set */
    try {
        Instances reducedSet = new Instances(new BufferedReader(new FileReader("reduced.arff")));
        reducedSet.setClassIndex(reducedSet.numAttributes() - 1);

        // calculating the evaluation values using each classifier respectively
        if (verbose == true) {
            System.out.println();
            System.out.println(" |----------J4.8-----------|");
            System.out.println(" |            |            |");
        }
        J48 tree = new J48();
        classifiersScore = classify(tree, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |-----3NearestNeighbor----|");
            System.out.println(" |            |            |");
        }
        IBk nn3 = new IBk(3);
        classifiersScore = classify(nn3, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |--------NaiveBayes-------|");
            System.out.println(" |            |            |");
        }
        NaiveBayes nb = new NaiveBayes();
        classifiersScore = classify(nb, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |-------RandomForest------|");
            System.out.println(" |            |            |");
        }
        RandomForest rf5 = new RandomForest();
        rf5.setNumTrees(5);
        classifiersScore = classify(rf5, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |---------Logistic--------|");
            System.out.println(" |            |            |");
        }
        Logistic log = new Logistic();
        classifiersScore = classify(log, reducedSet, internalTest);
        fitnessValue += classifiersScore;

    } catch (IOException ioe) {
        ioe.printStackTrace();
    }

    fitnessValue /= 5;

    if (verbose == true) {
        System.out.println();
        System.out.println("Fitness: " + fitnessValue);
        System.out.println("---------------------------------------------------");
    }

    return fitnessValue;
}

From source file:au.edu.usyd.it.yangpy.snp.GEsnpxPara.java

License:Open Source License

/**
 * this function perform genetic operations
 *
 * @param saveFlag   append/write to the output file
 *//*w  w w  .j  a va  2  s  .  c  o m*/
public void performGeneticOperation(int saveFlag) throws Exception {
    // initialize processing components
    // loading the raw data
    Instances rawData = new Instances(new BufferedReader(new FileReader(file)));
    rawData.setClassIndex(rawData.numAttributes() - 1);

    ParallelGenetic genetic = new ParallelGenetic(rawData, chroLen, popSize, terGener, mode, balance, diversity,
            numThread);
    genetic.initializeParameters();
    genetic.initializeChromosomes();
    genetic.evaluate();

    for (int i = 1; i < genetic.getTerimateGeneration(); i++) {
        genetic.selectElitism();
        genetic.selectUsingTournament();
        genetic.crossover();
        genetic.mutate();
        genetic.generateNewGeneration();
        genetic.evaluate();
    }

    if (saveFlag == 0)
        genetic.saveBestChro(false);
    else
        genetic.saveBestChro(true);
}

From source file:aw_cluster.myKMeans.java

protected double[] moveCentroid(Instances members) {
    double[] vals = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {
        vals[j] = members.meanOrMode(j);
    }/*w  ww .  j  av  a 2s  .c o  m*/
    centroid.add(new Instance(1.0, vals));

    return vals;
}

From source file:binarizer.LayoutAnalysis.java

public double crossValidation(String arffFile) throws Exception {
    DataSource source = new DataSource(arffFile);
    Instances trainingData = source.getDataSet();
    if (trainingData.classIndex() == -1)
        trainingData.setClassIndex(trainingData.numAttributes() - 1);
    NaiveBayes nb = new NaiveBayes();
    nb.setUseSupervisedDiscretization(true);
    Evaluation evaluation = new Evaluation(trainingData);
    evaluation.crossValidateModel(nb, trainingData, 10, new Random(1));
    System.out.println(evaluation.toSummaryString());
    return evaluation.errorRate();
}

From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java

/**
 * Initialize the tree configuration. This implementation considers a complete binary tree of depth h. 
 *//*w  w  w .j  a va2 s  . co m*/
private void initialize() throws IOException {
    //the number of nodes N, is penednt on h. actualy N
    //h=ln N -> N=2^h
    Instances instances = readArff(getFilenameARFF());
    instances.setClassIndex(-1);//clustering Stuff

    numberOfInstances = instances.numInstances();
    dimensionality = instances.numAttributes();
    data = new double[getNumberOfInstances()][getDimensionality()];
    weight = new double[getNumberOfNeurons()][getDimensionality()];
    //randomly select instances and assign to weight.

    for (int k = 0; k < getNumberOfNeurons(); k++) {
        weight[k] = instances.instance(r.nextInt(getNumberOfInstances())).toDoubleArray(); //hard copy of the double array
    }

    for (int k = 0; k < getNumberOfInstances(); k++) {
        data[k] = instances.instance(k).toDoubleArray(); //hard copy of the double array
    }
}