Example usage for weka.core Instances insertAttributeAt

List of usage examples for weka.core Instances insertAttributeAt

Introduction

In this page you can find the example usage for weka.core Instances insertAttributeAt.

Prototype



public void insertAttributeAt(Attribute att, int position) 

Source Link

Document

Inserts an attribute at the given position (0 to numAttributes()) and sets all values to be missing.

Usage

From source file:distributed.core.DistributedUtils.java

License:Open Source License

public static Instances makeHeaderWithSummaryAtts(Instances denormalized, boolean treatZerosAsMissing) {
    Instances header = new Instances(denormalized, 0);

    for (int i = 0; i < denormalized.numAttributes(); i++) {
        AttributeStats stats = denormalized.attributeStats(i);
        if (denormalized.attribute(i).isNumeric()) {
            NumericStats ns = new NumericStats(denormalized.attribute(i).name());
            if (!treatZerosAsMissing) {
                ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = stats.numericStats.min;
                ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = stats.numericStats.max;
                ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()] = stats.numericStats.count;
                ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] = stats.numericStats.sum;
                ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] = stats.numericStats.sumSq;
                ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()] = stats.missingCount;

                ns.computeDerived();//from w  ww .ja  va2 s . co  m
            } else {
                ns = getNumericAttributeStatsSparse(denormalized, i);
            }

            Attribute newAtt = ns.makeAttribute();
            header.insertAttributeAt(newAtt, header.numAttributes());
        } else if (denormalized.attribute(i).isNominal()) {
            NominalStats nom = new NominalStats(denormalized.attribute(i).name());
            nom.setNumMissing(stats.missingCount);

            double[] labelFreqs = stats.nominalWeights;
            for (int j = 0; j < denormalized.attribute(i).numValues(); j++) {
                nom.add(denormalized.attribute(i).value(j), labelFreqs[j]);
            }

            Attribute newAtt = nom.makeAttribute();
            header.insertAttributeAt(newAtt, header.numAttributes());
        }
    }

    return header;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *///from  ww w.ja v  a2  s.  c o  m
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java

License:Open Source License

/**
 * adding y attributes without giving it values
 *///from ww  w  .  j  a v a2s.com
public static Instances addYforWeka(Instances xInsts) {

    // add another column for y
    int n = xInsts.numAttributes();
    xInsts.insertAttributeAt(new Attribute(Integer.toString(n)), n);

    // last attribute is y value, the class 'label'
    xInsts.setClassIndex(n);

    return xInsts;
}

From source file:elh.eus.absa.WekaWrapper.java

License:Open Source License

/**
 *      Train one vs all models over the given training data.
 *  /*w w  w.ja va2s.c om*/
 * @param modelpath directory to store each model for the one vs. all method
 * @param prefix prefix the models should have (each model will have the name of its class appended
 * @throws Exception
 */
public void trainOneVsAll(String modelpath, String prefix) throws Exception {
    Instances orig = new Instances(traindata);
    Enumeration<Object> classValues = traindata.classAttribute().enumerateValues();
    String classAtt = traindata.classAttribute().name();
    while (classValues.hasMoreElements()) {
        String v = (String) classValues.nextElement();
        System.err.println("trainer onevsall for class " + v + " classifier");
        //needed because of weka's sparse data format problems THIS IS TROUBLE! ...
        if (v.equalsIgnoreCase("dummy")) {
            continue;
        }
        // copy instances and set the same class value
        Instances ovsa = new Instances(orig);
        //create a new class attribute         
        //   // Declare the class attribute along with its values
        ArrayList<String> classVal = new ArrayList<String>();
        classVal.add("dummy"); //needed because of weka's sparse data format problems...
        classVal.add(v);
        classVal.add("UNKNOWN");
        ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes());
        //change all instance labels that have not the current class value to "other"
        for (int i = 0; i < ovsa.numInstances(); i++) {
            Instance inst = ovsa.instance(i);
            String instClass = inst.stringValue(ovsa.attribute(classAtt).index());
            if (instClass.equalsIgnoreCase(v)) {
                inst.setValue(ovsa.attribute(classAtt + "2").index(), v);
            } else {
                inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN");
            }
        }
        //delete the old class attribute and set the new.         
        ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index());
        ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index());
        ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt);
        ovsa.setClassIndex(ovsa.attribute(classAtt).index());

        //build the classifier, crossvalidate and store the model
        setTraindata(ovsa);
        saveModel(modelpath + File.separator + prefix + "_" + v + ".model");
        setTestdata(ovsa);
        testModel(modelpath + File.separator + prefix + "_" + v + ".model");

        System.err.println("trained onevsall " + v + " classifier");
    }

    setTraindata(orig);
}

From source file:es.jarias.FMC.ClassCompoundTransformation.java

License:Open Source License

/**
 * /* w  w  w  .  ja v a  2s . c o  m*/
 * @param mlData
 * @return the transformed instances
 * @throws Exception
 */
public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    data = mlData.getDataSet();
    numLabels = mlData.getNumLabels();
    labelIndices = mlData.getLabelIndices();

    Instances newData = null;

    // This must be different in order to combine ALL class states, not only existing ones.
    // gather distinct label combinations
    // ASSUME CLASSES ARE BINARY

    ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>();

    double[] dblLabels = new double[numLabels];
    double nCombinations = Math.pow(2, numLabels);

    for (int i = 0; i < nCombinations; i++) {
        for (int l = 0; l < numLabels; l++) {
            int digit = (int) Math.pow(2, numLabels - 1 - l);
            dblLabels[l] = (digit & i) / digit;
        }

        LabelSet labelSet = new LabelSet(dblLabels);
        labelSets.add(labelSet);
    }

    //        for (int i = 0; i < numInstances; i++) {
    //            // construct labelset
    //            double[] dblLabels = new double[numLabels];
    //            for (int j = 0; j < numLabels; j++) {
    //                int index = labelIndices[j];
    //                dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index)));
    //            }
    //            LabelSet labelSet = new LabelSet(dblLabels);
    //
    //            // add labelset if not already present
    //            labelSets.add(labelSet);
    //        }

    // create class attribute
    ArrayList<String> classValues = new ArrayList<String>(labelSets.size());
    for (LabelSet subset : labelSets) {
        classValues.add(subset.toBitString());
    }
    newClass = new Attribute("class", classValues);

    //        for (String s : classValues)
    //        {
    //           System.out.print(s+", ");
    //           
    //        }
    //        System.out.println();

    // remove all labels
    newData = RemoveAllLabels.transformInstances(data, labelIndices);

    // add new class attribute
    newData.insertAttributeAt(newClass, newData.numAttributes());
    newData.setClassIndex(newData.numAttributes() - 1);

    // add class values
    for (int i = 0; i < newData.numInstances(); i++) {
        //System.out.println(newData.instance(i).toString());
        String strClass = "";
        for (int j = 0; j < numLabels; j++) {
            int index = labelIndices[j];
            strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index));
        }
        //System.out.println(strClass);
        newData.instance(i).setClassValue(strClass);
    }
    transformedFormat = new Instances(newData, 0);
    return newData;
}

From source file:etc.aloe.filters.AbstractRegexFilter.java

License:Open Source License

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    if (stringAttributeName == null) {
        throw new IllegalStateException("String attribute name not set");
    }/* w  w w . j a va2s  . com*/

    Instances outputFormat = new Instances(inputFormat, 0);

    Attribute stringAttr = inputFormat.attribute(stringAttributeName);
    stringAttributeIndex = stringAttr.index();

    //Add the new columns. There is one for each regex feature.
    NamedRegex[] regexFeatures = getRegexFeatures();
    for (int i = 0; i < regexFeatures.length; i++) {
        String name = regexFeatures[i].getName();
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());

        if (countRegexLengths) {
            name = name + "_L";
            attr = new Attribute(name);
            outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
        }

    }

    return outputFormat;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    if (getStringAttribute() == null) {
        throw new IllegalStateException("String attribute name not set");
    }//from w w  w  .  j av a 2 s.co  m

    stringAttributeIndex = inputFormat.attribute(getStringAttribute()).index();

    inputFormat = getInputFormat();
    //This generates m_selectedTerms and m_DocsCounts
    int[] docsCountsByTermIdx = determineDictionary(inputFormat);

    //Initialize the output format to be just like the input
    Instances outputFormat = new Instances(inputFormat, 0);

    //Set up the map from attr index to document frequency
    m_DocsCounts = new int[m_selectedTerms.size()];
    //And add the new attributes
    for (int i = 0; i < m_selectedTerms.size(); i++) {
        int attrIdx = outputFormat.numAttributes();
        int docsCount = docsCountsByTermIdx[i];
        m_DocsCounts[i] = docsCount;

        outputFormat.insertAttributeAt(new Attribute(m_Prefix + m_selectedTerms.get(i)), attrIdx);
    }

    return outputFormat;
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

private Instances generateOutputFormat(Instances inputFormat) {
    Instances outputFormat = new Instances(inputFormat, 0);

    //Add the new columns. There is one for each unigram and each bigram.
    for (int i = 0; i < unigrams.size(); i++) {
        String name = "uni_" + unigrams.get(i);
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
    }/*from   w  w  w  .  jav  a 2 s.com*/

    for (int i = 0; i < bigrams.size(); i++) {
        String name = "bi_" + bigrams.get(i);
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
    }

    return outputFormat;
}

From source file:examples.TrainerFrame.java

private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed
    //This is a temporary fix to make it appear like its finished

    pBar.setMaximum(7);//from   w ww .j ava2s .c  o m
    pBar.setValue(0);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Target Features");
    //Generate Target Features
    String featuresTarget = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(1);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Other Features");

    //Generate Non-targe features Features
    String featuresOther = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(2);
    pBar.repaint();
    jLabelTrainerStatus.setText("Parsing Features");

    //Load Target Arrf File
    BufferedReader readerTarget;
    Instances dataTarget = null;
    try {
        readerTarget = new BufferedReader(new FileReader(featuresTarget));
        dataTarget = new Instances(readerTarget);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(3);
    pBar.repaint();
    //Load Other Arrf File
    BufferedReader readerOther;
    Instances dataOther = null;
    try {
        readerOther = new BufferedReader(new FileReader(featuresOther));
        dataOther = new Instances(readerOther);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(4);
    pBar.repaint();
    jLabelTrainerStatus.setText("Training Classifier");

    Instances newData = new Instances(dataTarget);
    FastVector typeList = new FastVector() {
    };
    typeList.add("target");
    typeList.add("other");
    newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            newData.numAttributes());
    for (Instance instance : newData) {
        instance.setValue(newData.numAttributes() - 1, "target");
    }

    dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            dataOther.numAttributes());
    for (Instance instance : dataOther) {
        instance.setValue(newData.numAttributes() - 1, "other");
        newData.add(instance);
    }

    newData.setClassIndex(newData.numAttributes() - 1);
    pBar.setValue(5);
    pBar.repaint();
    ArffSaver saver = new ArffSaver();
    saver.setInstances(newData);
    try {
        saver.setFile(new File("AnimalCallTrainingFile.arff"));
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(6);
    pBar.repaint();
    //Train a classifier
    String[] options = new String[1];
    options[0] = "-U";
    J48 tree = new J48();
    try {
        tree.setOptions(options);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree);
    System.out.println("classifier saved");
    MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText());
    GlobalData.classifierList.addElement(tempClass.name);
    pBar.setValue(7);
    pBar.repaint();
    jLabelTrainerStatus.setText("Finished");

}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*from  w w  w . j a  va  2s.  c o m*/
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}