Example usage for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });

        String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                + "-plusIDs.arff";

        Instances data = DataSource.read(location);

        if (data == null) {
            throw new IOException("Could not load data from: " + location);
        }//from  ww  w  .  j  a v  a2  s.  c o  m

        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}

From source file:edu.illinois.cs.cogcomp.saul.learn.SaulWekaWrapper.java

License:Open Source License

/**
 * Creates a WEKA Instance object out of a {@link FeatureVector}.
 **///from   ww w .ja v a2  s  .  c  o  m
private Instance makeInstance(LBJavaInstance instance) {

    // Initialize an Instance object
    Instance inst = new Instance(attributeInfo.size());

    // Acknowledge that this instance will be a member of our dataset 'wekaInstances'
    inst.setDataset(wekaInstances);

    // set all nominal feature values to 0, which means those features are not used in this example
    for (int i = 1; i < attributeInfo.size(); i++)
        if (inst.attribute(i).isNominal())
            inst.setValue(i, "0");

    // Assign values for its attributes
    /*
     * Since we are iterating through this example's feature list, which does not contain the
     * label feature (the label feature is the first in the 'attribute' list), we set attIndex
     * to at exampleFeatures[featureIndices] + 1, while we start featureIndices at 0.
     */
    for (int featureIndex = 0; featureIndex < instance.featureIndices.length; ++featureIndex) {
        int attIndex = instance.featureIndices[featureIndex] + 1;
        Feature f = lexicon.lookupKey(instance.featureIndices[featureIndex]);

        // if the feature does not exist, do nothing. this may occur in test set.
        if (f == null)
            continue;
        Attribute att = (Attribute) attributeInfo.elementAt(attIndex);

        // make sure the feature and the attribute match
        if (!(att.name().equals(f.toString()))) {
            System.err.println(
                    "WekaWrapper: Error - makeInstance encountered a misaligned " + "attribute-feature pair.");
            System.err.println("  " + att.name() + " and " + f.toString() + " should have been identical.");
            new Exception().printStackTrace();
            System.exit(1);
        }
        if (f.isDiscrete())
            inst.setValue(attIndex, "1"); // this feature is used in this example so we set it to "1"
        else
            inst.setValue(attIndex, instance.featureValues[featureIndex]);

    }

    /*
     * Here, we assume that if either the labels FeatureVector is empty of features, or is null,
     * then this example is to be considered unlabeled.
     */
    if (instance.labelIndices.length == 0) {
        inst.setClassMissing();
    } else if (instance.labelIndices.length > 1) {
        System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + "value, ");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        Feature label = labelLexicon.lookupKey(instance.labelIndices[0]);

        // make sure the label feature matches the n 0'th attribute
        if (!(label.getGeneratingClassifier().equals(((Attribute) attributeInfo.elementAt(0)).name()))) {
            System.err.println("WekaWrapper: Error - makeInstance found the wrong label name.");
            new Exception().printStackTrace();
            System.exit(1);
        }

        if (!label.isDiscrete())
            inst.setValue(0, instance.labelValues[0]);
        else
            inst.setValue(0, label.getStringValue());
    }

    return inst;
}

From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *///from  w  ww .java2  s. com
private void convertInstance(Instance instance) {

    double[] vals = new double[outputFormatPeek().numAttributes()];
    String[] stringVals = new String[vals.length];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        Attribute att = instance.attribute(j);
        if (!m_Columns.isInRange(j)) {
            vals[attSoFar] = instance.value(j);
            attSoFar++;
        } else {
            // store new string values, make double values "missing" for now (if some string
            // values are missing, the double values will remain missing)
            if (instance.value(0) == 12 && instance.value(1) == 9 && att.name().equals("sempos")) {
                attSoFar = attSoFar;
            }
            attSoFar += getAttributeOutputValue(att, instance.value(j), vals, stringVals, attSoFar);
        }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
    } else {
        inst = new DenseInstance(instance.weight(), vals);
    }

    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());

    // add new string values to the output data set and to the instance
    for (int i = 0; i < stringVals.length; ++i) {
        if (stringVals[i] != null) {
            vals[i] = inst.dataset().attribute(i).addStringValue(stringVals[i]);
        }
    }
    inst.replaceMissingValues(vals);

    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *//* www.  jav a2s .  c o m*/
private void convertInstance(Instance instance) {

    // create a copy of the input instance
    Instance inst = null;

    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), instance.toDoubleArray());
    } else {
        inst = new DenseInstance(instance.weight(), instance.toDoubleArray());
    }

    // copy the string values from this instance as well (only the existing ones)
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!!
    inst.setDataset(getOutputFormat());

    // find the missing values to be filled + the double values for the new "missing" label and store it
    double[] vals = instance.toDoubleArray();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = instance.attribute(j);

        if (m_Columns.isInRange(j) && instance.isMissing(j)) {
            // find the "missing" value in the output nominal attribute
            if (att.isNominal()) {
                vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal);
            }
            // add a string value for the new "missing" label
            else if (att.isString()) {
                vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal);
            }
        }
    }

    // fill in the missing values found
    inst.replaceMissingValues(vals);

    push(inst);
}

From source file:irisdriver.IrisDriver.java

/**
 * @param args the command line arguments
 *//* w  w  w  . j a  va2 s  .c  om*/
public static void main(String[] args) {
    //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2    
    try {
        Hashtable<String, String> values = new Hashtable<String, String>();
        /*Iris irisModel = new Iris();
                
        for(int i = 0; i < args.length; i++) {
        String[] tokens = args[i].split("=");
                
        values.put(tokens[0], tokens[1]);
        }
                
        System.out.println("Classification: " + irisModel.classifySpecies(values));*/

        //Loading the model
        String pathModel = "";
        String pathTestSet = "";
        JFileChooser chooserModel = new JFileChooser();
        chooserModel.setCurrentDirectory(new java.io.File("."));
        chooserModel.setDialogTitle("Choose the model");
        chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
        chooserModel.setAcceptAllFileFilterUsed(true);

        if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
            File filePathModel = chooserModel.getSelectedFile();
            pathModel = filePathModel.getPath();

            Iris irisModel = new Iris(pathModel);

            //Loading the model
            JFileChooser chooserTestSet = new JFileChooser();
            chooserTestSet.setDialogTitle("Choose TEST SET");
            chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
            chooserTestSet.setAcceptAllFileFilterUsed(true);

            //Loading the testing dataset
            if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
                File filePathTestSet = chooserTestSet.getSelectedFile();
                pathTestSet = filePathTestSet.getPath();

                //WRITTING THE OUTPUT:
                BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt"));

                //Transforming the data set into pairs attribute-value
                ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet);
                Instances unlabeledData = unlabeledSource.getDataSet();
                if (unlabeledData.classIndex() == -1) {
                    unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1);
                }

                for (int i = 0; i < unlabeledData.numInstances(); i++) {
                    Instance ins = unlabeledData.instance(i);

                    //ins.numAttributes()-1 --> not to include the label
                    for (int j = 0; j < ins.numAttributes() - 1; j++) {

                        String attrib = ins.attribute(j).name();
                        double val = ins.value(ins.attribute(j));

                        values.put(attrib, String.valueOf(val));

                    }

                    String predictedLabel = irisModel.classifySpecies(values);
                    System.out.println("Classification: " + predictedLabel);
                    values.clear();

                    //Writting the results in a txt
                    writer.write("The label is: " + predictedLabel);

                    //writer.newLine();

                    //writers.write("The error rate of the prediction is : " + eval.errorRate());

                    //writer.newLine();

                }

                writer.flush();
                writer.close();

            }

        }

    } catch (Exception ex) {
        Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Returns index of subset instance is assigned to.
 * Returns -1 if instance is assigned to more than one subset.
 *
 * @exception Exception if something goes wrong
 *//*w  w  w.  j  a v  a2 s  . c  o  m*/

public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex))
        return -1;
    else {
        if (instance.attribute(m_attIndex).isNominal()) {
            if ((int) m_splitPoint == (int) instance.value(m_attIndex))
                return 0;
            else
                return 1;
        } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint))
            return 0;
        else
            return 1;
    }
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Returns index of subset instance is assigned to. Returns -1 if instance
 * is assigned to more than one subset./*from  w  ww  .  j  av a2s  .  com*/
 * 
 * @exception Exception
 *                if something goes wrong
 */
public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex))
        return -1;
    else {
        if (instance.attribute(m_attIndex).isNominal())
            return (int) instance.value(m_attIndex);
        else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint))
            return 0;
        else
            return 1;
    }
}

From source file:j48.GraftSplit.java

License:Open Source License

/**
 * @param instance the instance for which to determine the subset
 * @return an int indicating the subset this instance belongs to
 *///  w ww . ja v  a 2 s  . com
public int whichSubset(Instance instance) {

    if (instance.isMissing(m_attIndex))
        return -1;

    if (instance.attribute(m_attIndex).isNominal()) {
        // in the case of nominal, m_splitPoint is the = value, all else is !=
        if (instance.value(m_attIndex) == m_splitPoint)
            return 0;
        else
            return 1;
    } else {
        if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint))
            return 0;
        else
            return 1;
    }
}

From source file:kea.KEAPhraseFilter.java

License:Open Source License

/** 
 * Converts an instance by removing all non-alphanumeric characters
 * from its string attribute values./*from   www .  j  ava2  s .  co  m*/
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if (!instance.attribute(i).isString() || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            if (!m_SelectCols.isInRange(i)) {
                int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i));
                instVals[i] = (double) index;
                continue;
            }
            String str = instance.stringValue(i);
            StringBuffer resultStr = new StringBuffer();
            int j = 0;
            boolean phraseStart = true;
            boolean seenNewLine = false;
            boolean haveSeenHyphen = false;
            boolean haveSeenSlash = false;
            while (j < str.length()) {
                boolean isWord = false;
                boolean potNumber = false;
                int startj = j;
                while (j < str.length()) {
                    char ch = str.charAt(j);
                    if (Character.isLetterOrDigit(ch)) {
                        potNumber = true;
                        if (Character.isLetter(ch)) {
                            isWord = true;
                        }
                        j++;
                    } else if ((!m_DisallowInternalPeriods && (ch == '.')) || (ch == '@') || (ch == '_')
                            || (ch == '&') || (ch == '/') || (ch == '-')) {
                        if ((j > 0) && (j + 1 < str.length()) && Character.isLetterOrDigit(str.charAt(j - 1))
                                && Character.isLetterOrDigit(str.charAt(j + 1))) {
                            j++;
                        } else {
                            break;
                        }
                    } else if (ch == '\'') {
                        if ((j > 0) && Character.isLetterOrDigit(str.charAt(j - 1))) {
                            j++;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
                if (isWord == true) {
                    if (!phraseStart) {
                        if (haveSeenHyphen) {
                            resultStr.append('-');
                        } else if (haveSeenSlash) {
                            resultStr.append('/');
                        } else {
                            resultStr.append(' ');
                        }
                    }
                    resultStr.append(str.substring(startj, j));
                    if (j == str.length()) {
                        break;
                    }
                    phraseStart = false;
                    seenNewLine = false;
                    haveSeenHyphen = false;
                    haveSeenSlash = false;
                    if (Character.isWhitespace(str.charAt(j))) {
                        if (str.charAt(j) == '\n') {
                            seenNewLine = true;
                        }
                    } else if (str.charAt(j) == '-') {
                        haveSeenHyphen = true;
                    } else if (str.charAt(j) == '/') {
                        haveSeenSlash = true;
                    } else {
                        phraseStart = true;
                        resultStr.append('\n');
                    }
                    j++;
                } else if (j == str.length()) {
                    break;
                } else if (str.charAt(j) == '\n') {
                    if (seenNewLine) {
                        if (phraseStart == false) {
                            resultStr.append('\n');
                            phraseStart = true;
                        }
                    } else if (potNumber) {
                        if (phraseStart == false) {
                            phraseStart = true;
                            resultStr.append('\n');
                        }
                    }
                    seenNewLine = true;
                    j++;
                } else if (Character.isWhitespace(str.charAt(j))) {
                    if (potNumber) {
                        if (phraseStart == false) {
                            phraseStart = true;
                            resultStr.append('\n');
                        }
                    }
                    j++;
                } else {
                    if (phraseStart == false) {
                        resultStr.append('\n');
                        phraseStart = true;
                    }
                    j++;
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:kea.NumbersFilter.java

License:Open Source License

/** 
 * Converts an instance. A phrase boundary is inserted where
 * a number is found.//from  w  w  w  .jav  a  2  s. c  o  m
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if ((!instance.attribute(i).isString()) || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            String str = instance.stringValue(i);
            StringBuffer resultStr = new StringBuffer();
            StringTokenizer tok = new StringTokenizer(str, " \t\n", true);
            while (tok.hasMoreTokens()) {
                String token = tok.nextToken();

                // Everything that doesn't contain at least
                // one letter is considered to be a number
                boolean isNumber = true;
                for (int j = 0; j < token.length(); j++) {
                    if (Character.isLetter(token.charAt(j))) {
                        isNumber = false;
                        break;
                    }
                }
                if (!isNumber) {
                    resultStr.append(token);
                } else {
                    if (token.equals(" ") || token.equals("\t") || token.equals("\n")) {
                        resultStr.append(token);
                    } else {
                        resultStr.append(" \n ");
                    }
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}