Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java

License:Open Source License

/**
 * This method gets the headers of the features.
 * @param features  a List of features//from   www .  jav a  2 s.  c om
 * @return header with features headers
 */
public Instances getHeader(List<String> features) {
    int capacity = 100000;

    List<String> featuresCopy = null;
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> defect = new ArrayList<String>();

    defect.add("true");
    defect.add("false");

    if (features != null) {
        featuresCopy = new ArrayList<String>(features);

        for (int i = 0; i < featuresCopy.size(); i++) {
            String rest = featuresCopy.get(i).substring(1);
            char first = featuresCopy.get(i).charAt(0);
            first = Character.toLowerCase(first);
            featuresCopy.set(i, (first + rest).replaceAll(" ", ""));
        }
    }

    for (int j = 0; j < ftStandard.getHead().length; j++) {
        if (features == null || featuresCopy.contains(ftStandard.getHead()[j]))
            atts.add(new Attribute(ftStandard.getHead()[j]));
    }

    for (int j = 0; j < ftStandardSaliency.getHead().length; j++) {
        if (features == null || featuresCopy.contains(ftStandard.getHead()[j] + "(S)"))
            atts.add(new Attribute(ftStandardSaliency.getHead()[j] + "(S)"));
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralick.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i]))
                atts.add(new Attribute(ftHaralick.getHead()[i] + "_mean" + j));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralick.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i]))
                atts.add(new Attribute(ftHaralick.getHead()[i] + "_range" + j));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)"))
                atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_mean" + j + "(S)"));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)"))
                atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_range" + j + "(S)"));
        }
    }

    for (int j = 1; j < 60; j++) {
        if (features == null || featuresCopy.contains(ftLbp.getHead() + "_" + j))
            atts.add(new Attribute(ftLbp.getHead() + "(" + j + ")"));
    }

    for (int j = 1; j < 60; j++) {
        if (features == null || featuresCopy.contains(ftLbpSaliency.getHead() + "_" + j + "(S)"))
            atts.add(new Attribute(ftLbpSaliency.getHead() + "(" + j + ")(S)"));
    }

    atts.add(new Attribute("Defecto", defect));

    // Capacidad es el nmero de instancias.
    Instances header = new Instances("NuevaInstancia", atts, capacity);
    // Establecer la clase
    header.setClassIndex(header.numAttributes() - 1);

    return header;
}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

License:Open Source License

/**
 * @param csvFilePath//from   www  . j  a v a  2s . co m
 * @return
 * @throws Exception
 */
private Instances getDataFromCSV(String csvFilePath) throws Exception {
    DataSource source = new DataSource(csvFilePath);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    return data;
}

From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java

License:Open Source License

/**
 * @param cls/* ww  w .j a  va  2  s  .  c om*/
 * @param trainingData
 * @param testData
 * @param leba
 * @return [0] = pctCorrect, [1] = pctIncorrect
 * @throws Exception
 */
public double[] getValidation(Classifier cls, Instances trainingData, Instances testData, int leba)
        throws Exception {

    Instances testDataWithLEBA = new Instances(testData);

    for (int j = 0; j < leba; j++) {
        if (j < testDataWithLEBA.numAttributes() - 1) {
            for (int i = 0; i < testDataWithLEBA.numInstances(); i++) {
                testDataWithLEBA.instance(i).setMissing(j);
            }
        }
    }

    Evaluation eval;
    try {
        eval = new Evaluation(trainingData);
        logger.fine("Evaluating model with leba: " + leba);
        eval.evaluateModel(cls, testDataWithLEBA);

        double[] results = new double[2];
        results[0] = eval.pctCorrect() / 100;
        results[1] = eval.pctIncorrect() / 100;
        return results;
    } catch (Exception e) {
        logger.severe("Problems evaluating model for " + cls.getClass().getSimpleName());
        logger.severe(e.getMessage());
        e.printStackTrace();
        throw e;
    }
}

From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java

License:Open Source License

/**
 * @param csvFilePath// ww  w .java2 s . c  o m
 * @return
 * @throws Exception
 */
public static Instances getDataFromCSV(String csvFilePath) throws Exception {
    DataSource source = new DataSource(csvFilePath);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    return data;
}

From source file:etc.aloe.cscw2013.FeatureGenerationImpl.java

License:Open Source License

@Override
public FeatureSpecification generateFeatures(ExampleSet basicExamples) {

    ExampleSet examples = basicExamples.copy();
    FeatureSpecification spec = new FeatureSpecification();

    System.out.print("Configuring features over " + examples.size() + " examples... ");

    try {/*from   w w w .  j a  va2 s .c om*/
        spec.addFilter(getPronounsFilter(examples));
        spec.addFilter(getPunctuationFilter(examples));
        spec.addFilter(getSpecialWordsFilter(examples));
        spec.addFilter(getSpellingFilter(examples));

        spec.addFilter(getEmoticonsFilter(examples));
        spec.addFilter(getBagOfWordsFilter(examples));
        spec.addFilter(getRemoveIDFilter(examples));

        if (this.getParticipantFeatureCount() > 0) {
            spec.addFilter(getParticipantsFilter(examples));
        } else {
            spec.addFilter(getRemoveParticipantFilter(examples));
        }

        Instances output = spec.getOutputFormat();
        int numAttrs = output.numAttributes();
        System.out.println("generated " + (numAttrs - 1) + " features.");
    } catch (Exception e) {
        System.err.println("Error generating features.");
        System.err.println("\t" + e.getMessage());
    }

    return spec;
}

From source file:etc.aloe.data.SegmentSet.java

License:Open Source License

/**
 * Convert the segment set into an ExampleSet (ready for feature
 * extraction). The returned example set includes an id attribute, the
 * message text, a label attribute, and several basic features extracted
 * from the segment.// w  ww. ja v  a 2s  . com
 *
 * @return
 */
public ExampleSet getBasicExamples() {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME));
    attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" })));
    attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(DURATION_ATTR_NAME));
    attributes.add(new Attribute(LENGTH_ATTR_NAME));
    attributes.add(new Attribute(CPS_ATTR_NAME));
    attributes.add(new Attribute(RATE_ATTR_NAME));

    Instances instances = new Instances("BasicExamples", attributes, 0);
    instances.setClassIndex(2);

    Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME);
    Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME);
    Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME);
    Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME);
    Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME);
    Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME);
    Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME);
    Attribute rateAttr = instances.attribute(RATE_ATTR_NAME);

    for (int i = 0; i < size(); i++) {
        Segment segment = get(i);
        Instance instance = new DenseInstance(instances.numAttributes());

        String messageStr = segment.concatMessages();
        String participantStr = segment.concatParticipants();

        instance.setValue(idAttr, segment.getId());
        instance.setValue(messageAttr, messageStr);
        instance.setValue(participantAttr, participantStr);

        if (segment.hasTrueLabel()) {
            instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false");
        }

        computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr);

        instances.add(instance);
    }

    return new ExampleSet(instances);
}

From source file:etc.aloe.filters.AbstractRegexFilter.java

License:Open Source License

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    if (stringAttributeName == null) {
        throw new IllegalStateException("String attribute name not set");
    }/*from w w w .  j a  v  a2 s.c  o  m*/

    Instances outputFormat = new Instances(inputFormat, 0);

    Attribute stringAttr = inputFormat.attribute(stringAttributeName);
    stringAttributeIndex = stringAttr.index();

    //Add the new columns. There is one for each regex feature.
    NamedRegex[] regexFeatures = getRegexFeatures();
    for (int i = 0; i < regexFeatures.length; i++) {
        String name = regexFeatures[i].getName();
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());

        if (countRegexLengths) {
            name = name + "_L";
            attr = new Attribute(name);
            outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
        }

    }

    return outputFormat;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    if (getStringAttribute() == null) {
        throw new IllegalStateException("String attribute name not set");
    }/*from ww  w. j  a v a  2s .  co  m*/

    stringAttributeIndex = inputFormat.attribute(getStringAttribute()).index();

    inputFormat = getInputFormat();
    //This generates m_selectedTerms and m_DocsCounts
    int[] docsCountsByTermIdx = determineDictionary(inputFormat);

    //Initialize the output format to be just like the input
    Instances outputFormat = new Instances(inputFormat, 0);

    //Set up the map from attr index to document frequency
    m_DocsCounts = new int[m_selectedTerms.size()];
    //And add the new attributes
    for (int i = 0; i < m_selectedTerms.size(); i++) {
        int attrIdx = outputFormat.numAttributes();
        int docsCount = docsCountsByTermIdx[i];
        m_DocsCounts[i] = docsCount;

        outputFormat.insertAttributeAt(new Attribute(m_Prefix + m_selectedTerms.get(i)), attrIdx);
    }

    return outputFormat;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {//from   w  w w .  j a  v a  2  s .  c o  m
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

private Instances generateOutputFormat(Instances inputFormat) {
    Instances outputFormat = new Instances(inputFormat, 0);

    //Add the new columns. There is one for each unigram and each bigram.
    for (int i = 0; i < unigrams.size(); i++) {
        String name = "uni_" + unigrams.get(i);
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
    }/* w  w w  . j a  va2  s  .  c  o m*/

    for (int i = 0; i < bigrams.size(); i++) {
        String name = "bi_" + bigrams.get(i);
        Attribute attr = new Attribute(name);
        outputFormat.insertAttributeAt(attr, outputFormat.numAttributes());
    }

    return outputFormat;
}