Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:com.entopix.maui.filters.MauiFilter.java

License:Open Source License

/**
 * Sets output format and converts pending input instances.
 *///from w ww .  j  a  v a  2s . c om
@SuppressWarnings("unchecked")
private void convertPendingInstances() {

    if (debugMode) {
        log.info("--- Converting pending instances");
    }

    // Create output format for filter
    FastVector atts = new FastVector();
    for (int i = 1; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Candidate_name", (FastVector) null)); // 0
            atts.addElement(new Attribute("Candidate_original", (FastVector) null)); // 1

            atts.addElement(new Attribute("Term_frequency")); // 0
            atts.addElement(new Attribute("IDF")); // 1
            atts.addElement(new Attribute("TFxIDF")); // 2 
            atts.addElement(new Attribute("First_occurrence")); // 3
            atts.addElement(new Attribute("Last_occurrence")); // 4
            atts.addElement(new Attribute("Spread")); // 5
            atts.addElement(new Attribute("Domain_keyphraseness")); // 6
            atts.addElement(new Attribute("Length")); // 7
            atts.addElement(new Attribute("Generality")); // 8
            atts.addElement(new Attribute("Node_degree")); // 9
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 10
            atts.addElement(new Attribute("Wikipedia_inlinks")); // 11
            atts.addElement(new Attribute("Wikipedia_generality")); // 12

            atts.addElement(new Attribute("Probability")); // 16
            atts.addElement(new Attribute("Rank")); // 17

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        } else {
            atts.addElement(getInputFormat().attribute(i));
        }
    }

    Instances outFormat = new Instances("mauidata", atts, 0);
    setOutputFormat(outFormat);

    // Convert pending input instances into output data
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        FastVector vector = convertInstance(current, true);
        Enumeration<Instance> en = vector.elements();
        while (en.hasMoreElements()) {
            Instance inst = (Instance) en.nextElement();
            push(inst);
        }
    }
}

From source file:com.entopix.maui.main.MauiModelBuilder.java

License:Open Source License

/**
 * Builds the model from the training data
 * @throws MauiFilterException /*from   ww  w.ja  va2s.  c  om*/
 */
public MauiFilter buildModel(List<MauiDocument> documents) throws MauiFilterException {

    log.info("-- Building the model... ");

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("document", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    mauiFilter = new MauiFilter();
    mauiFilter.setMaxPhraseLength(maxPhraseLength);
    mauiFilter.setMinPhraseLength(minPhraseLength);
    mauiFilter.setMinNumOccur(minNumOccur);
    mauiFilter.setStemmer(stemmer);
    mauiFilter.setDocumentLanguage(documentLanguage);
    mauiFilter.setVocabularyName(vocabularyName);
    mauiFilter.setVocabularyFormat(vocabularyFormat);
    mauiFilter.setStopwords(stopwords);
    mauiFilter.setVocabulary(vocabulary);

    if (classifier != null) {
        mauiFilter.setClassifier(classifier);
    }

    mauiFilter.setInputFormat(data);

    // set features configurations
    mauiFilter.setBasicFeatures(useBasicFeatures);
    mauiFilter.setKeyphrasenessFeature(useKeyphrasenessFeature);
    mauiFilter.setFrequencyFeatures(useFrequencyFeatures);
    mauiFilter.setPositionsFeatures(usePositionsFeatures);
    mauiFilter.setLengthFeature(useLengthFeature);
    mauiFilter.setThesaurusFeatures(useThesaurusFeatures);
    mauiFilter.setWikipediaFeatures(useWikipediaFeatures, wikiFeatures);

    mauiFilter.setClassifier(classifier);

    if (!vocabularyName.equals("none")) {
        loadVocabulary();
        mauiFilter.setVocabulary(vocabulary);
    }

    log.info("-- Adding documents as instances... ");

    for (MauiDocument document : documents) {

        double[] newInst = new double[3];
        newInst[0] = data.attribute(0).addStringValue(document.getFileName());

        // Adding the text and the topics for the document to the instance
        if (document.getTextContent().length() > 0) {
            newInst[1] = data.attribute(1).addStringValue(document.getTextContent());
        } else {
            newInst[1] = Instance.missingValue();
        }

        if (document.getTopicsString().length() > 0) {
            newInst[2] = data.attribute(2).addStringValue(document.getTopicsString());
        } else {
            newInst[2] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        mauiFilter.input(data.instance(0));
        data = data.stringFreeStructure();
    }
    log.info("-- Building the model... ");

    mauiFilter.batchFinished();

    while ((mauiFilter.output()) != null) {
    }

    return mauiFilter;

}

From source file:com.esda.util.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//ww w  .  j  av  a 2 s.c  o m
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
    }

    // TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current
        // instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove
                // stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    String[] wordsArr = word.split(" ");
                    StringBuilder stemmedStr = new StringBuilder();
                    for (String wordStr : wordsArr) {
                        if (!this.m_useStoplist || !stopwords.is(wordStr)) {
                            stemmedStr.append(m_Stemmer.stem(wordStr));
                            stemmedStr.append(" ");
                        }
                    }
                    /*for (int icounter = 0; icounter < wordsArr.length; icounter++) {
                       stemmedStr += m_Stemmer.stem(wordsArr[icounter]);
                       if (icounter + 1 < wordsArr.length)
                          stemmedStr += " ";
                    }*/

                    word = stemmedStr.toString().trim();

                    if (!(h.containsKey(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        // updating the docCount for the words that have occurred in this
        // instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
        }
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java

public String clasificar(String[] testCases) throws Exception {
    String ruta = "model.model";

    InputStream classModelStream;
    classModelStream = getClass().getResourceAsStream(ruta);
    Classifier clasify = (Classifier) SerializationHelper.read(classModelStream);
    FastVector condition = new FastVector();
    condition.addElement("Cloudy");
    condition.addElement("Clear");
    condition.addElement("Sunny");
    condition.addElement("Fair");
    condition.addElement("Partly_Cloudy");
    condition.addElement("Mostly_Cloudy");
    condition.addElement("Showers");
    condition.addElement("Haze");
    condition.addElement("Dust");
    condition.addElement("Other");
    Attribute _condition = new Attribute("contition", condition);

    FastVector temperature = new FastVector();
    temperature.addElement("Hot");
    temperature.addElement("Mild");
    temperature.addElement("Cool");
    Attribute _temperature = new Attribute("temperature", temperature);

    FastVector chill = new FastVector();
    chill.addElement("Regrettable");
    chill.addElement("Mint");
    Attribute _chill = new Attribute("chill", chill);

    FastVector direction = new FastVector();
    direction.addElement("Mint");
    direction.addElement("Fair");
    direction.addElement("Regular");
    Attribute _direction = new Attribute("direction", direction);

    FastVector speed = new FastVector();
    speed.addElement("Mint");
    speed.addElement("Fair");
    speed.addElement("Regular");
    Attribute _speed = new Attribute("speed", speed);

    FastVector humidity = new FastVector();
    humidity.addElement("High");
    humidity.addElement("Normal");
    humidity.addElement("Low");
    Attribute _humidity = new Attribute("humidity", humidity);

    FastVector visibility = new FastVector();
    visibility.addElement("Recommended");
    visibility.addElement("Not_Recommended");
    Attribute _visibility = new Attribute("visibility", visibility);

    FastVector preassure = new FastVector();
    preassure.addElement("Fair");
    preassure.addElement("Mint");
    Attribute _preassure = new Attribute("preassure", preassure);

    FastVector Class = new FastVector();
    Class.addElement("Recommended");
    Class.addElement("Not_Recommended");
    Attribute _Class = new Attribute("class", Class);

    FastVector atributos = new FastVector(9);
    atributos.addElement(_condition);//from   ww  w  .  j a  v  a  2 s  . c  om
    atributos.addElement(_temperature);
    atributos.addElement(_chill);
    atributos.addElement(_direction);
    atributos.addElement(_speed);
    atributos.addElement(_humidity);
    atributos.addElement(_visibility);
    atributos.addElement(_preassure);
    atributos.addElement(_Class);

    ArrayList<Attribute> atributs = new ArrayList<>();
    atributs.add(_condition);
    atributs.add(_temperature);
    atributs.add(_chill);
    atributs.add(_direction);
    atributs.add(_speed);
    atributs.add(_humidity);
    atributs.add(_visibility);
    atributs.add(_preassure);
    atributs.add(_Class);

    //Aqu se crea la instacia, que tiene todos los atributos del modelo
    Instances dataTest = new Instances("TestCases", atributos, 1);
    dataTest.setClassIndex(8);

    Instance setPrueba = new Instance(9);

    int index = -1;
    for (int i = 0; i < 8; i++) {
        index = atributs.get(i).indexOfValue(testCases[i]);
        //System.out.println(i + " " + atributs.get(i)  + " " + index + " " + testCases[i]);
        setPrueba.setValue(atributs.get(i), index);
    }

    //Agregando el set que se desea evaluar.
    dataTest.add(setPrueba);

    //Realizando la Prediccin
    //La instancia es la 0 debido a que es la unica que se encuentra.
    double valorP = clasify.classifyInstance(dataTest.instance(0));
    //get the name of the class value
    String prediccion = dataTest.classAttribute().value((int) valorP);

    return prediccion;
}

From source file:com.github.polarisation.kea.main.KEAKeyphraseExtractor.java

License:Open Source License

/**
 * Builds the model from the files//from   w  w  w  . j a v  a  2s . com
 */
public void extractKeyphrases(Hashtable stems) throws Exception {

    Vector stats = new Vector();

    // Check whether there is actually any data
    // = if there any files in the directory
    if (stems.size() == 0) {
        throw new Exception("Couldn't find any data!");
    }
    m_KEAFilter.setNumPhrases(m_numPhrases);
    m_KEAFilter.setVocabulary(m_vocabulary);
    m_KEAFilter.setVocabularyFormat(m_vocabularyFormat);
    m_KEAFilter.setDocumentLanguage(getDocumentLanguage());
    m_KEAFilter.setStemmer(m_Stemmer);
    m_KEAFilter.setStopwords(m_Stopwords);

    if (getVocabulary().equals("none")) {
        m_KEAFilter.m_NODEfeature = false;
    } else {
        m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords);
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    atts.addElement(new Attribute("filename", (String) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    if (m_KEAFilter.m_Dictionary == null) {
        buildGlobalDictionaries(stems);
    }

    System.err.println("-- Extracting Keyphrases... ");
    // Extract keyphrases
    Enumeration elem = stems.keys();
    // Enumeration over all files in the directory (now in the hash):
    while (elem.hasMoreElements()) {
        String str = (String) elem.nextElement();

        double[] newInst = new double[2];
        try {
            File txt = new File(m_dirName + "/" + str + ".txt");
            InputStreamReader is;
            if (!m_encoding.equals("default")) {
                is = new InputStreamReader(new FileInputStream(txt), m_encoding);
            } else {
                is = new InputStreamReader(new FileInputStream(txt));
            }
            StringBuffer txtStr = new StringBuffer();
            int c;
            while ((c = is.read()) != -1) {
                txtStr.append((char) c);
            }

            newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());

        } catch (Exception e) {
            if (m_debug) {
                System.err.println("Can't read document " + str + ".txt");
            }
            newInst[0] = Instance.missingValue();
        }
        try {
            File key = new File(m_dirName + "/" + str + ".key");
            InputStreamReader is;
            if (!m_encoding.equals("default")) {
                is = new InputStreamReader(new FileInputStream(key), m_encoding);
            } else {
                is = new InputStreamReader(new FileInputStream(key));
            }
            StringBuffer keyStr = new StringBuffer();
            int c;

            // keyStr = keyphrases in the str.key file
            // Kea assumes, that these keyphrases were assigned by the author
            // and evaluates extracted keyphrases againse these

            while ((c = is.read()) != -1) {
                keyStr.append((char) c);
            }

            newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString());
        } catch (Exception e) {
            if (m_debug) {
                System.err.println("No existing keyphrases for stem " + str + ".");
            }
            newInst[1] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        m_KEAFilter.input(data.instance(0));

        data = data.stringFreeStructure();
        if (m_debug) {
            System.err.println("-- Document: " + str);
        }
        Instance[] topRankedInstances = new Instance[m_numPhrases];
        Instance inst;

        // Iterating over all extracted keyphrases (inst)
        while ((inst = m_KEAFilter.output()) != null) {

            int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1;

            if (index < m_numPhrases) {
                topRankedInstances[index] = inst;

            }
        }

        if (m_debug) {
            System.err.println("-- Keyphrases and feature values:");
        }
        FileOutputStream out = null;
        PrintWriter printer = null;
        File key = new File(m_dirName + "/" + str + ".key");
        if (!key.exists()) {
            out = new FileOutputStream(m_dirName + "/" + str + ".key");
            if (!m_encoding.equals("default")) {
                printer = new PrintWriter(new OutputStreamWriter(out, m_encoding));

            } else {
                printer = new PrintWriter(out);
            }
        }
        double numExtracted = 0, numCorrect = 0;

        for (int i = 0; i < m_numPhrases; i++) {
            if (topRankedInstances[i] != null) {
                if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
                    numExtracted += 1.0;
                }
                if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
                    numCorrect += 1.0;
                }
                if (printer != null) {
                    printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex()));

                    if (m_AdditionalInfo) {
                        printer.print("\t");
                        printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex()));
                        printer.print("\t");
                        printer.print(Utils.doubleToString(
                                topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4));
                    }
                    printer.println();
                }
                if (m_debug) {
                    System.err.println(topRankedInstances[i]);
                }
            }
        }
        if (numExtracted > 0) {
            if (m_debug) {
                System.err.println("-- " + numCorrect + " correct");
            }
            stats.addElement(new Double(numCorrect));
        }
        if (printer != null) {
            printer.flush();
            printer.close();
            out.close();
        }
    }
    double[] st = new double[stats.size()];
    for (int i = 0; i < stats.size(); i++) {
        st[i] = ((Double) stats.elementAt(i)).doubleValue();
    }
    double avg = Utils.mean(st);
    double stdDev = Math.sqrt(Utils.variance(st));

    System.err.println("Avg. number of matching keyphrases compared to existing ones : "
            + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2));
    System.err.println("Based on " + stats.size() + " documents");
    // m_KEAFilter.batchFinished();
}

From source file:com.hack23.cia.service.impl.action.user.wordcount.WordCounterImpl.java

License:Apache License

@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData,
        final int maxResult) {

    final String html = documentContentData.getContent();

    final Attribute input = new Attribute("html", (ArrayList<String>) null);

    final ArrayList<Attribute> inputVec = new ArrayList<>();
    inputVec.add(input);//from  w  ww  .  ja  va  2  s .  c o m

    final Instances htmlInst = new Instances("html", inputVec, 1);

    htmlInst.add(new DenseInstance(1));
    htmlInst.instance(0).setValue(0, html);

    final StopwordsHandler StopwordsHandler = new StopwordsHandler() {

        @Override
        public boolean isStopword(final String word) {

            return word.length() < 5;
        }
    };

    final NGramTokenizer tokenizer = new NGramTokenizer();
    tokenizer.setNGramMinSize(1);
    tokenizer.setNGramMaxSize(1);
    tokenizer.setDelimiters(" \r\n\t.,;:'\"()?!'");

    final StringToWordVector filter = new StringToWordVector();
    filter.setTokenizer(tokenizer);
    filter.setStopwordsHandler(StopwordsHandler);
    filter.setLowerCaseTokens(true);
    filter.setOutputWordCounts(true);
    filter.setWordsToKeep(maxResult);

    final Map<String, Integer> result = new HashMap<>();

    try {
        filter.setInputFormat(htmlInst);
        final Instances dataFiltered = Filter.useFilter(htmlInst, filter);

        final Instance last = dataFiltered.lastInstance();

        final int numAttributes = last.numAttributes();

        for (int i = 0; i < numAttributes; i++) {
            result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
        }
    } catch (final Exception e) {
        LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
    }

    return result;
}

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

public static String classify(String model, String text, JTextArea log) {
    FilteredClassifier classifier = loadModel(model, log);

    //Create the instance
    ArrayList<String> fvNominalVal = new ArrayList<String>();
    fvNominalVal.add("ham");
    fvNominalVal.add("spam");

    Attribute attribute1 = new Attribute("spam_class", fvNominalVal);
    Attribute attribute2 = new Attribute("text", (List<String>) null);
    ArrayList<Attribute> fvWekaAttributes = new ArrayList<Attribute>();
    fvWekaAttributes.add(attribute1);//ww w . j  ava2 s . co m
    fvWekaAttributes.add(attribute2);

    Instances instances = new Instances("Test relation", fvWekaAttributes, 1);
    instances.setClassIndex(0);

    DenseInstance instance = new DenseInstance(2);
    instance.setValue(attribute2, text);
    instances.add(instance);

    publishEstado("=== Instance created ===", log);
    publishEstado(instances.toString(), log);

    //Classify the instance
    try {
        publishEstado("=== Classifying instance ===", log);

        double pred = classifier.classifyInstance(instances.instance(0));

        publishEstado("=== Instance classified  ===", log);

        String classPredicted = instances.classAttribute().value((int) pred);
        publishEstado("Class predicted: " + classPredicted, log);

        return classPredicted;
    } catch (Exception e) {
        publishEstado("Error found when classifying the text", log);
        return null;
    }
}

From source file:com.jgaap.util.Instance.java

License:Open Source License

/**
 * Main method for testing this class.//from  w w w. j  a v  a2 s .co  m
 * 
 * @param options the commandline options - ignored
 */
//@ requires options != null;
public static void main(String[] options) {

    try {

        // Create numeric attributes "length" and "weight"
        Attribute length = new Attribute("length");
        Attribute weight = new Attribute("weight");

        // Create vector to hold nominal values "first", "second", "third" 
        FastVector my_nominal_values = new FastVector(3);
        my_nominal_values.addElement("first");
        my_nominal_values.addElement("second");
        my_nominal_values.addElement("third");

        // Create nominal attribute "position" 
        Attribute position = new Attribute("position", my_nominal_values);

        // Create vector of the above attributes 
        FastVector attributes = new FastVector(3);
        attributes.addElement(length);
        attributes.addElement(weight);
        attributes.addElement(position);

        // Create the empty dataset "race" with above attributes
        Instances race = new Instances("race", attributes, 0);

        // Make position the class attribute
        race.setClassIndex(position.index());

        // Create empty instance with three attribute values
        Instance inst = new Instance(3);

        // Set instance's values for the attributes "length", "weight", and "position"
        inst.setValue(length, 5.3);
        inst.setValue(weight, 300);
        inst.setValue(position, "first");

        // Set instance's dataset to be the dataset "race"
        inst.setDataset(race);

        // Print the instance
        System.out.println("The instance: " + inst);

        // Print the first attribute
        System.out.println("First attribute: " + inst.attribute(0));

        // Print the class attribute
        System.out.println("Class attribute: " + inst.classAttribute());

        // Print the class index
        System.out.println("Class index: " + inst.classIndex());

        // Say if class is missing
        System.out.println("Class is missing: " + inst.classIsMissing());

        // Print the instance's class value in internal format
        System.out.println("Class value (internal format): " + inst.classValue());

        // Print a shallow copy of this instance
        Instance copy = (Instance) inst.copy();
        System.out.println("Shallow copy: " + copy);

        // Set dataset for shallow copy
        copy.setDataset(inst.dataset());
        System.out.println("Shallow copy with dataset set: " + copy);

        // Unset dataset for copy, delete first attribute, and insert it again
        copy.setDataset(null);
        copy.deleteAttributeAt(0);
        copy.insertAttributeAt(0);
        copy.setDataset(inst.dataset());
        System.out.println("Copy with first attribute deleted and inserted: " + copy);

        // Enumerate attributes (leaving out the class attribute)
        System.out.println("Enumerating attributes (leaving out class):");
        Enumeration enu = inst.enumerateAttributes();
        while (enu.hasMoreElements()) {
            Attribute att = (Attribute) enu.nextElement();
            System.out.println(att);
        }

        // Headers are equivalent?
        System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy));

        // Test for missing values
        System.out.println("Length of copy missing: " + copy.isMissing(length));
        System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
        System.out.println("Length of copy missing: " + Instance.isMissingValue(copy.value(length)));
        System.out.println("Missing value coded as: " + Instance.missingValue());

        // Prints number of attributes and classes
        System.out.println("Number of attributes: " + copy.numAttributes());
        System.out.println("Number of classes: " + copy.numClasses());

        // Replace missing values
        double[] meansAndModes = { 2, 3, 0 };
        copy.replaceMissingValues(meansAndModes);
        System.out.println("Copy with missing value replaced: " + copy);

        // Setting and getting values and weights
        copy.setClassMissing();
        System.out.println("Copy with missing class: " + copy);
        copy.setClassValue(0);
        System.out.println("Copy with class value set to first value: " + copy);
        copy.setClassValue("third");
        System.out.println("Copy with class value set to \"third\": " + copy);
        copy.setMissing(1);
        System.out.println("Copy with second attribute set to be missing: " + copy);
        copy.setMissing(length);
        System.out.println("Copy with length set to be missing: " + copy);
        copy.setValue(0, 0);
        System.out.println("Copy with first attribute set to 0: " + copy);
        copy.setValue(weight, 1);
        System.out.println("Copy with weight attribute set to 1: " + copy);
        copy.setValue(position, "second");
        System.out.println("Copy with position set to \"second\": " + copy);
        copy.setValue(2, "first");
        System.out.println("Copy with last attribute set to \"first\": " + copy);
        System.out.println("Current weight of instance copy: " + copy.weight());
        copy.setWeight(2);
        System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
        System.out.println("Last value of copy: " + copy.toString(2));
        System.out.println("Value of position for copy: " + copy.toString(position));
        System.out.println("Last value of copy (internal format): " + copy.value(2));
        System.out.println("Value of position for copy (internal format): " + copy.value(position));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.kdcloud.lib.domain.DataSpecification.java

License:Open Source License

public Instances newInstances(String relationalName) {
    return new Instances(relationalName, getAttrInfo(), 1000);
}

From source file:com.kdcloud.lib.domain.DataSpecification.java

License:Open Source License

public boolean matchingSpecification(Instances instances) {
    return new Instances("", getAttrInfo(), 0).equalHeaders(instances);
}