Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

/**
 * Generates a Gaussian data set with K clusters and m dimensions
 * //from   ww  w.j av  a  2 s  . c o  m
 * @param centers
 *            K x m matrix
 * @param sigmas
 *            K x m matrix
 * @param pointsPerCluster
 *            number of points per cluster
 * @param seed
 *            for the RNG
 * @param randomize
 *            should the order of the instances be randomized?
 * @param supervised
 *            should class label be present? if true, the class is the m+1
 *            attribute
 * 
 * @return
 */
public static Instances genGaussianDataset(double[][] centers, double[][] sigmas, int pointsPerCluster,
        long seed, boolean randomize, boolean supervised) {
    Random r = new Random(seed);

    int K = centers.length; // number of clusters
    int m = centers[0].length; // number of dimensions

    FastVector atts = new FastVector(m);
    for (int i = 0; i < m; i++)
        atts.addElement(new Attribute("at" + i));

    if (supervised) {
        FastVector cls = new FastVector(K);
        for (int i = 0; i < K; i++)
            cls.addElement("Gauss-" + i);
        atts.addElement(new Attribute("Class", cls));
    }

    Instances data;
    if (supervised)
        data = new Instances(K + "-Gaussians-supervised", atts, K * pointsPerCluster);
    else
        data = new Instances(K + "-Gaussians", atts, K * pointsPerCluster);

    if (supervised)
        data.setClassIndex(m);

    Instance ith;

    for (int i = 0; i < K; i++) {
        for (int j = 0; j < pointsPerCluster; j++) {
            if (!supervised)
                ith = new DenseInstance(m);
            else
                ith = new DenseInstance(m + 1);
            ith.setDataset(data);
            for (int k = 0; k < m; k++)
                ith.setValue(k, centers[i][k] + (r.nextGaussian() * sigmas[i][k]));
            if (supervised)
                ith.setValue(m, "Gauss-" + i);
            data.add(ith);
        }
    }

    // run randomization filter if desired
    if (randomize)
        data.randomize(r);

    return data;
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(Object objData) {

    try {/*  w ww  .ja  v  a 2 s . c o  m*/
        Instances dataInstance;
        DataBag values = (DataBag) objData;
        int numAttributes = values.iterator().next().size(); // N_Features + 1 Class
        int bagSize = 0; // To set the number of train samples

        // To find the number of samples (instances in a bag)
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            it.next();
            bagSize = bagSize + 1;
        }

        // Code for find the different classes names in the input 
        String[] inputClass = new String[bagSize]; // String vector with the samples class's names
        int index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            inputClass[index] = DataType.toString(tuple.get(numAttributes - 1));
            index = index + 1;
        }

        HashSet classSet = new HashSet(Arrays.asList(inputClass));

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i)));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(tuple.get(numAttributes - 1)));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(BufferedReader buff) {

    try {/*from   ww w .  ja v a 2 s.co m*/
        Instances dataInstance;
        //DataBag values = (DataBag)objData;

        int numAttributes = 0; // N_Features + 1 Class

        List<String> inputClass = new ArrayList<String>();

        List<String[]> dataset = new ArrayList<String[]>();

        // To find the number of samples (instances in a bag)
        String line;
        while ((line = buff.readLine()) != null) {
            if (!line.isEmpty()) {
                String[] data = line.split(",");
                if (numAttributes == 0)
                    numAttributes = data.length;
                inputClass.add(data[data.length - 1]);
                dataset.add(data);
            }
        }

        HashSet classSet = new HashSet(inputClass);

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        int index = 0;
        for (int k = 0; k < dataset.size(); k++) {

            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i]));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(dataset.get(k)[numAttributes - 1]));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}

From source file:br.ufpe.cin.mpos.offload.DynamicDecisionSystem.java

License:Apache License

public synchronized boolean isRemoteAdvantage(int InputSize, Remotable.Classifier classifierRemotable) {
    boolean resp = false;
    try {/*  www  . j av  a  2 s .c o m*/
        if ((!(this.classifierModel.equals(classifierRemotable.toString()))) || this.classifier == null) {
            Log.d("classificacao", "classificador=" + classifierRemotable.toString());
            this.classifierModel = classifierRemotable.toString();
            loadClassifier(classifierRemotable);
        }
        Cursor c = dc.getData();
        int colunas = c.getColumnCount();
        Instance instance = new DenseInstance(colunas - 2);
        ArrayList<String> values = new ArrayList<String>();
        ArrayList<Attribute> atts = new ArrayList<Attribute>();
        if (c.moveToFirst()) {
            for (int i = 1; i <= colunas - 2; i++) {
                String feature = c.getColumnName(i);
                String value = c.getString(i);
                Attribute attribute;
                if (feature.equals(DatabaseManager.InputSize)) {
                    values.add("" + InputSize);
                    attribute = new Attribute(DatabaseManager.InputSize);
                } else {
                    String[] strings = populateAttributes(i);
                    ArrayList<String> attValues = new ArrayList<String>(Arrays.asList(strings));
                    attribute = new Attribute(feature, attValues);
                    if (value != null) {
                        values.add(value);
                    }
                }
                atts.add(attribute);
            }
            Instances instances = new Instances("header", atts, atts.size());
            instances.setClassIndex(instances.numAttributes() - 1);
            instance.setDataset(instances);
            for (int i = 0; i < atts.size(); i++) {
                if (i == 9) {
                    instance.setMissing(atts.get(9));
                } else if (atts.get(i).name().equals(DatabaseManager.InputSize)) {
                    instance.setValue(atts.get(i), InputSize);
                } else {
                    instance.setValue(atts.get(i), values.get(i));
                }
            }
            double value = -1;
            value = classifier.distributionForInstance(instance)[0];
            Log.d("classificacao", instance.toString() + " classifiquei com o seguinte valor" + value);
            resp = (0.7 <= value);
            if (resp) {
                Log.d("classificacao", "sim");
                Log.d("Finalizado", "classifiquei " + instance.toString() + " com sim");
            } else {
                Log.d("classificacao", "nao");
                Log.d("Finalizado", "classifiquei " + instance.toString() + " com nao");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        Log.e("sqlLite", e.getMessage());
        Log.e("sqlLite", "Causa: " + e.getCause());
    }
    return resp;
}

From source file:br.unicamp.ic.recod.gpsi.gp.gpsiJGAPRoiFitnessFunction.java

@Override
protected double evaluate(IGPProgram igpp) {

    double mean_accuracy = 0.0;
    Object[] noargs = new Object[0];

    gpsiRoiBandCombiner roiBandCombinator = new gpsiRoiBandCombiner(new gpsiJGAPVoxelCombiner(super.b, igpp));
    // TODO: The ROI descriptors must combine the images first
    //roiBandCombinator.combineEntity(this.dataset.getTrainingEntities());

    gpsiMLDataset mlDataset = new gpsiMLDataset(this.descriptor);
    try {// w  w w . j av  a 2  s.  com
        mlDataset.loadWholeDataset(this.dataset, true);
    } catch (Exception ex) {
        Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex);
    }

    int dimensionality = mlDataset.getDimensionality();
    int n_classes = mlDataset.getTrainingEntities().keySet().size();
    int n_entities = mlDataset.getNumberOfTrainingEntities();
    ArrayList<Byte> listOfClasses = new ArrayList<>(mlDataset.getTrainingEntities().keySet());

    Attribute[] attributes = new Attribute[dimensionality];
    FastVector fvClassVal = new FastVector(n_classes);

    int i, j;
    for (i = 0; i < dimensionality; i++)
        attributes[i] = new Attribute("f" + Integer.toString(i));
    for (i = 0; i < n_classes; i++)
        fvClassVal.addElement(Integer.toString(listOfClasses.get(i)));

    Attribute classes = new Attribute("class", fvClassVal);

    FastVector fvWekaAttributes = new FastVector(dimensionality + 1);

    for (i = 0; i < dimensionality; i++)
        fvWekaAttributes.addElement(attributes[i]);
    fvWekaAttributes.addElement(classes);

    Instances instances = new Instances("Rel", fvWekaAttributes, n_entities);
    instances.setClassIndex(dimensionality);

    Instance iExample;
    for (byte label : mlDataset.getTrainingEntities().keySet()) {
        for (double[] featureVector : mlDataset.getTrainingEntities().get(label)) {
            iExample = new Instance(dimensionality + 1);
            for (j = 0; j < dimensionality; j++)
                iExample.setValue(i, featureVector[i]);
            iExample.setValue(dimensionality, label);
            instances.add(iExample);
        }
    }

    int folds = 5;
    Random rand = new Random();
    Instances randData = new Instances(instances);
    randData.randomize(rand);

    Instances trainingSet, testingSet;
    Classifier cModel;
    Evaluation eTest;
    try {
        for (i = 0; i < folds; i++) {
            cModel = (Classifier) new SimpleLogistic();
            trainingSet = randData.trainCV(folds, i);
            testingSet = randData.testCV(folds, i);

            cModel.buildClassifier(trainingSet);

            eTest = new Evaluation(trainingSet);
            eTest.evaluateModel(cModel, testingSet);

            mean_accuracy += eTest.pctCorrect();

        }
    } catch (Exception ex) {
        Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex);
    }

    mean_accuracy /= (folds * 100);

    return mean_accuracy;

}

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);//from   ww w.  j ava2 s  .c  o  m
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:ca.uottawa.balie.WekaLearner.java

License:Open Source License

/**
 * Creates a new classification algorithm.
 * //from w w  w.j  a v a 2 s .  co  m
 * @param pi_Attributes         Array of attributes
 * @param pi_ClassAttributes   Class attribute
 */
public WekaLearner(WekaAttribute[] pi_Attributes, String[] pi_ClassAttributes) {
    m_ClassAttributes = pi_ClassAttributes;
    m_AttributeLabels = new String[pi_Attributes.length];

    // Create the Weka Attributes
    m_WekaAttributes = new FastVector(pi_Attributes.length + 1);
    for (int i = 0; i != pi_Attributes.length; ++i) {
        if (pi_Attributes[i].Label().equals(CLASS_LABEL)) {
            throw new Error("Attribute cannot be named \"Class\" (reserved)");
        }
        m_AttributeLabels[i] = pi_Attributes[i].Label();
        if (pi_Attributes[i].IsNumeric()) {
            Attribute anAttribute = new Attribute(pi_Attributes[i].Label());
            m_WekaAttributes.addElement(anAttribute);
        } else {
            Attribute anAttribute = new Attribute(pi_Attributes[i].Label(), pi_Attributes[i].Values());
            m_WekaAttributes.addElement(anAttribute);
        }
    }

    // Create The class attribute 
    FastVector fvClass = new FastVector(pi_ClassAttributes.length);
    for (int i = 0; i != pi_ClassAttributes.length; ++i) {
        fvClass.addElement(pi_ClassAttributes[i]);
    }
    Attribute aClass = new Attribute(CLASS_LABEL, fvClass);

    m_WekaAttributes.addElement(aClass);

    m_TrainingSet = new Instances("", m_WekaAttributes, 0);
    m_TrainingSet.setClassIndex(aClass.index());

    m_TestingSet = new Instances("", m_WekaAttributes, 0);
    m_TestingSet.setClassIndex(aClass.index());

    // create an empty confusion matrix.. will be populated at evaluation time
    m_ConfusionMatrix = new double[0][0];
    m_bDoubleOnly = false;
}

From source file:ca.uottawa.balie.WekaLearner.java

License:Open Source License

public WekaLearner(FastVector attrsMerged, String[] attrlblMerged, String[] classList, Instances trainMerged) {
    m_AttributeLabels = attrlblMerged;//  w  w w .  java 2  s  .c o m
    m_ClassAttributes = classList;
    m_WekaAttributes = attrsMerged;
    m_TrainingSet = trainMerged;
    m_TestingSet = new Instances("", m_WekaAttributes, 0);
    m_TestingSet.setClassIndex(m_WekaAttributes.size() - 1);
}

From source file:categorization.SpectralWEKA.java

License:Open Source License

public void buildClusterer(ArrayList<String> seqDB, double[][] sm) {
    seqList = seqDB;/*ww w .j  a  v a 2  s.c om*/

    this.setSimMatrix(sm);

    Attribute seqString = new Attribute("sequence", (FastVector) null);
    FastVector attrInfo = new FastVector();
    attrInfo.addElement(seqString);
    Instances data = new Instances("data", attrInfo, 0);

    for (int i = 0; i < seqList.size(); i++) {
        Instance currentInst = new Instance(1);
        currentInst.setDataset(data);
        currentInst.setValue(0, seqList.get(i));
        data.add(currentInst);
    }

    try {
        buildClusterer(data);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:cezeri.utils.FactoryInstance.java

public static Instances generateInstances(String relationName, int nCols) {
    CMatrix cm = CMatrix.getInstance().zeros(1, nCols);
    FastVector att = new FastVector();
    for (int i = 0; i < cm.getColumnNumber(); i++) {
        att.addElement(new Attribute("f" + (i + 1)));
    }/*  w w w .  j ava2  s  .  c o  m*/
    Instances ret = new Instances(relationName, att, cm.getRowNumber());
    for (int i = 0; i < cm.getRowNumber(); i++) {
        Instance ins = new Instance(cm.getColumnNumber());
        for (int j = 0; j < cm.getColumnNumber(); j++) {
            ins.setValue(j, cm.get2DArrayDouble()[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(ret.numAttributes() - 1);
    return ret;
}