Example usage for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:edu.oregonstate.eecs.mcplan.ml.WekaGlue.java

License:Open Source License

public static double[] toDoubleArray(final Instance inst) {
    final double[] v = new double[inst.numAttributes()];
    for (int i = 0; i < inst.numAttributes(); ++i) {
        v[i] = inst.value(i);// w  ww.  ja  v a2  s.c  o  m
    }
    return v;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/**
 * Converts an instance to a feature vector excluding the class attribute.
 * @param instance The instance./* w  ww .j  a v a  2 s. c o m*/
 * @return A vector representation of the instance excluding the class attribute
 */
public static double[] instanceToDoubleArray(Instance instance) {
    double[] vector = new double[(instance.classIndex() != -1) ? instance.numAttributes() - 1
            : instance.numAttributes()];
    double[] instanceDoubleArray = instance.toDoubleArray();
    int attIdx = 0;
    for (int i = 0; i < vector.length; i++) {
        if (i == instance.classIndex()) {
            attIdx++;
        }
        vector[i] = instanceDoubleArray[attIdx++];
    }
    return vector;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts a set of instances to svm-light format
 * @param data the weka instances//w  w  w . ja  v  a  2s.c om
 * @return the weka instances in svm-light format
 */
public static String arffToSVMLight(Instance data, SVMLightLabelFormat labelFormat) {

    if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) {
        throw new IllegalArgumentException(
                "SVM-light classification label format requires that the data contain only two classes.");
    }

    String str = "";
    String endline = System.getProperty("line.separator");

    int numAttributes = data.numAttributes();
    int classAttIdx = data.classIndex();

    // convert the instance label
    if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) {
        str += (data.classValue() == 0) ? "-1" : "1";
    } else {
        str += data.classValue();
    }

    str += " ";

    // convert each feature
    for (int attIdx = 0; attIdx < numAttributes; attIdx++) {
        // skip the class attribute
        if (attIdx == classAttIdx)
            continue;
        str += (attIdx + 1) + ":" + data.value(attIdx) + " ";
    }

    // append the instance info string
    str += "#";

    str += endline;

    return str;
}

From source file:es.jarias.FMC.ClassCompoundTransformation.java

License:Open Source License

/**
 * /*ww w .  j  a  va2s.  com*/
 * @param instance
 * @param labelIndices
 * @return tranformed instance
 * @throws Exception
 */
public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception {
    Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
    transformedInstance.setDataset(null);
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    transformedInstance.setDataset(transformedFormat);
    return transformedInstance;
}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

License:Open Source License

/**
 * @param folds//from w w w  . ja v a 2s.  c  o m
 * @param minAgents
 * @param maxAgents
 * @param originalDatasetPath
 * @param outputDir
 * @param scenario
 * @param logger
 */
public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir,
        String scenario, Logger logger) {

    int ratioint = (int) ((1 / (double) folds) * 100);
    double roundedratio = ((double) ratioint) / 100;

    // Look for essentials
    List<String[]> essentials = this.getEssentials(originalDatasetPath, logger);

    for (int fold = 0; fold < folds; fold++) {
        String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold;
        File dir = new File(outputDirWithRatio);
        if (!dir.exists() || !dir.isDirectory()) {
            dir.mkdirs();
        }

        logger.finer("--> splitDataset()");
        logger.fine("Creating experiment.info...");

        try {

            Instances originalData = this.getDataFromCSV(originalDatasetPath);

            originalData.randomize(new Random());
            originalData.stratify(folds);

            // TestDataSet
            Instances testData = originalData.testCV(folds, fold);
            CSVSaver saver = new CSVSaver();
            ArffSaver arffsaver = new ArffSaver();
            File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(testData);
                saver.setFile(file);
                saver.writeBatch();
            }

            file = new File(outputDirWithRatio + File.separator + "test-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(testData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
            }

            // BayesCentralDataset
            Instances trainData = originalData.trainCV(folds, fold);
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(trainData);
                saver.setFile(file);
                saver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file,
                        new File(
                                outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(trainData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file, new File(
                        outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }

            // Agent datasets
            CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath)));
            csvreader.readHeaders();
            String[] headers = csvreader.getHeaders();
            csvreader.close();

            for (int agents = minAgents; agents <= maxAgents; agents++) {
                this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario,
                        logger);
                HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>();
                String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents";
                HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>();
                File f = new File(agentsDatasetsDir);
                if (!f.isDirectory()) {
                    f.mkdirs();
                }
                Instances copy = new Instances(trainData);
                copy.delete();
                for (int i = 0; i < agents; i++) {
                    String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv";
                    file = new File(fileName);
                    if (!file.exists()) {
                        CsvWriter writer = new CsvWriter(new FileWriter(fileName), ',');
                        writer.writeRecord(headers);
                        writers.put("AGENT" + i, writer);
                    }
                    fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff";
                    file = new File(fileName);
                    if (!file.exists()) {
                        arffsaver.resetOptions();
                        arffsaver.setInstances(copy);
                        arffsaver.setFile(new File(fileName));
                        arffsaver.writeBatch();
                        CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ',');
                        arffWriters.put("AGENT" + i, arffwriter);
                    }

                    logger.fine("AGENT" + i + " dataset created in csv and arff formats.");
                }
                // Append essentials to all
                for (String[] essential : essentials) {
                    for (CsvWriter wr : writers.values()) {
                        wr.writeRecord(essential);
                    }
                    for (CsvWriter arffwr : arffWriters.values()) {
                        arffwr.writeRecord(essential);
                    }
                }

                int agentCounter = 0;
                for (int j = 0; j < trainData.numInstances(); j++) {
                    Instance instance = trainData.instance(j);
                    CsvWriter writer = writers.get("AGENT" + agentCounter);
                    CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter);
                    String[] row = new String[instance.numAttributes()];
                    for (int a = 0; a < instance.numAttributes(); a++) {
                        row[a] = instance.stringValue(a);
                    }
                    if (writer != null) {
                        writer.writeRecord(row);
                    }
                    if (arffwriter != null) {
                        arffwriter.writeRecord(row);
                    }
                    agentCounter++;
                    if (agentCounter == agents) {
                        agentCounter = 0;
                    }
                }

                for (CsvWriter wr : writers.values()) {
                    wr.close();
                }
                for (CsvWriter arffwr : arffWriters.values()) {
                    arffwr.close();
                }
            }

        } catch (Exception e) {
            logger.severe("Exception while splitting dataset. ->");
            logger.severe(e.getMessage());
            System.exit(1);
        }

        logger.finest("Dataset for fold " + fold + " created.");
    }

    logger.finer("<-- splitDataset()");

}

From source file:etc.aloe.filters.AbstractRegexFilter.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }/* w w w  .j  a  va 2s . c om*/

    String stringValue = instance.stringValue(stringAttributeIndex);
    NamedRegex[] regexFeatures = getRegexFeatures();

    int numOldValues = instance.numAttributes();
    int numNewFeatures = regexFeatures.length;
    if (countRegexLengths) {
        numNewFeatures = regexFeatures.length * 2;
    }
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    for (int i = 0; i < regexFeatures.length; i++) {
        Pattern pattern = regexFeatures[i].getPattern();

        Matcher matches = pattern.matcher(stringValue);
        int count = 0;
        int maxLength = 0;
        while (matches.find()) {
            count++;
            int len = matches.group().length();
            if (len > maxLength) {
                maxLength = len;
            }
        }

        int index = numOldValues + i;
        if (countRegexLengths) {
            index = numOldValues + 2 * i;
        }
        newValues[index] = count;

        if (countRegexLengths) {
            newValues[numOldValues + 2 * i + 1] = maxLength;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 *
 * @param instance the instance to convert
 *
 * @param ArrayList<Instance> the list of instances
 * @return the document length//from   w w  w .  jav  a 2  s .com
 */
private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute index not valid");
    }

    int numOldValues = instance.numAttributes();
    double[] newValues = new double[numOldValues + m_selectedTerms.size()];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(stringAttributeIndex);
    double docLength = 0;

    HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue);
    for (Map.Entry<String, Integer> entry : termMatches.entrySet()) {
        String term = entry.getKey();
        int termIdx = m_selectedTermIndices.get(term);
        double matches = entry.getValue();
        if (!m_OutputCounts && matches > 0) {
            matches = 1;
        }

        if (matches > 0) {
            if (m_TFTransform == true) {
                matches = Math.log(matches + 1);
            }

            if (m_IDFTransform == true) {
                matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]);
            }

            newValues[numOldValues + termIdx] = matches;
            docLength += matches * matches;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    converted.add(result);

    return Math.sqrt(docLength);
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes).//from w  w w.  ja  v  a2 s  .c  o  m
 *
 * @param inst   the instance to normalize
 * @param double the document length
 * @throws Exception if avg. doc length not set
 */
private void normalizeInstance(Instance inst, double docLength) throws Exception {

    if (docLength == 0) {
        return;
    }

    int numOldValues = getInputFormat().numAttributes();

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Normalize document vector
    for (int j = numOldValues; j < inst.numAttributes(); j++) {
        double val = inst.value(j) * m_AvgDocLength / docLength;
        inst.setValue(j, val);
    }
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

@Override
protected Instance process(Instance instance) throws Exception {
    if (selectedAttributeIndex < 0) {
        throw new IllegalStateException("String attribute not set");
    }//  www  .ja v a2s  .c o m

    int numOldValues = instance.numAttributes();
    int numNewFeatures = unigrams.size() + bigrams.size();
    double[] newValues = new double[numOldValues + numNewFeatures];

    // Copy all attributes from input to output
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (getInputFormat().attribute(i).type() != Attribute.STRING) {
            // Add simple nominal and numeric attributes directly
            if (instance.value(i) != 0.0) {
                newValues[i] = instance.value(i);
            }
        } else {
            if (instance.isMissing(i)) {
                newValues[i] = Utils.missingValue();
            } else {

                // If this is a string attribute, we have to first add
                // this value to the range of possible values, then add
                // its new internal index.
                if (outputFormatPeek().attribute(i).numValues() == 0) {
                    // Note that the first string value in a
                    // SparseInstance doesn't get printed.
                    outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug");
                }
                int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i));
                newValues[i] = newIndex;
            }
        }
    }

    String stringValue = instance.stringValue(selectedAttributeIndex);
    if (instance.isMissing(selectedAttributeIndex) == false) {

        List<String> words = tokenizeDocument(instance);
        Set<String> wordSet = new HashSet<String>(words);

        for (int i = 0; i < unigrams.size(); i++) {
            String unigram = unigrams.get(i);
            int count = 0;
            if (wordSet.contains(unigram)) {
                //Count the times the word is in the document
                for (int w = 0; w < words.size(); w++) {
                    if (words.get(w).equals(unigram)) {
                        count += 1;
                    }
                }
            }

            int featureIndex = numOldValues + i;
            newValues[featureIndex] = count;
        }

        for (int i = 0; i < bigrams.size(); i++) {
            Bigram bigram = bigrams.get(i);
            int count = bigram.getTimesInDocument(words);
            int featureIndex = numOldValues + unigrams.size() + i;
            newValues[featureIndex] = count;
        }
    }

    Instance result = new SparseInstance(instance.weight(), newValues);
    return result;
}

From source file:expshell.NeuralClass.java

public static void main(String[] args) throws Exception {
    Instance testInst = new Instance(5);
    testInst.setValue(0, 2.1);/*from www .  java  2  s .  co m*/
    testInst.setValue(1, 3.1);
    testInst.setValue(2, 4.1);
    testInst.setValue(3, 5.1);
    //the class
    testInst.setValue(4, 0.0);

    Layer l = new Layer(5, testInst.numAttributes() - 1);

    l.computeNode(testInst);

}