Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity)

Source Link

Document

Creates an empty set of instances.

Usage

From source file:com.entopix.maui.filters.MauiFilter.java

License:Open Source License

/**
 * Sets output format and converts pending input instances.
 *///from w ww .  j  a  v a  2s . c om
@SuppressWarnings("unchecked")
private void convertPendingInstances() {

    if (debugMode) {
        log.info("--- Converting pending instances");
    }

    // Create output format for filter
    FastVector atts = new FastVector();
    for (int i = 1; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Candidate_name", (FastVector) null)); // 0
            atts.addElement(new Attribute("Candidate_original", (FastVector) null)); // 1

            atts.addElement(new Attribute("Term_frequency")); // 0
            atts.addElement(new Attribute("IDF")); // 1
            atts.addElement(new Attribute("TFxIDF")); // 2 
            atts.addElement(new Attribute("First_occurrence")); // 3
            atts.addElement(new Attribute("Last_occurrence")); // 4
            atts.addElement(new Attribute("Spread")); // 5
            atts.addElement(new Attribute("Domain_keyphraseness")); // 6
            atts.addElement(new Attribute("Length")); // 7
            atts.addElement(new Attribute("Generality")); // 8
            atts.addElement(new Attribute("Node_degree")); // 9
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 10
            atts.addElement(new Attribute("Wikipedia_inlinks")); // 11
            atts.addElement(new Attribute("Wikipedia_generality")); // 12

            atts.addElement(new Attribute("Probability")); // 16
            atts.addElement(new Attribute("Rank")); // 17

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        } else {
            atts.addElement(getInputFormat().attribute(i));
        }
    }

    Instances outFormat = new Instances("mauidata", atts, 0);
    setOutputFormat(outFormat);

    // Convert pending input instances into output data
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        FastVector vector = convertInstance(current, true);
        Enumeration<Instance> en = vector.elements();
        while (en.hasMoreElements()) {
            Instance inst = (Instance) en.nextElement();
            push(inst);
        }
    }
}

From source file:com.entopix.maui.main.MauiModelBuilder.java

License:Open Source License

/**
 * Builds the model from the training data
 * @throws MauiFilterException /*from   ww  w.ja  va2s.  c  om*/
 */
public MauiFilter buildModel(List<MauiDocument> documents) throws MauiFilterException {

    log.info("-- Building the model... ");

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("document", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    mauiFilter = new MauiFilter();
    mauiFilter.setMaxPhraseLength(maxPhraseLength);
    mauiFilter.setMinPhraseLength(minPhraseLength);
    mauiFilter.setMinNumOccur(minNumOccur);
    mauiFilter.setStemmer(stemmer);
    mauiFilter.setDocumentLanguage(documentLanguage);
    mauiFilter.setVocabularyName(vocabularyName);
    mauiFilter.setVocabularyFormat(vocabularyFormat);
    mauiFilter.setStopwords(stopwords);
    mauiFilter.setVocabulary(vocabulary);

    if (classifier != null) {
        mauiFilter.setClassifier(classifier);
    }

    mauiFilter.setInputFormat(data);

    // set features configurations
    mauiFilter.setBasicFeatures(useBasicFeatures);
    mauiFilter.setKeyphrasenessFeature(useKeyphrasenessFeature);
    mauiFilter.setFrequencyFeatures(useFrequencyFeatures);
    mauiFilter.setPositionsFeatures(usePositionsFeatures);
    mauiFilter.setLengthFeature(useLengthFeature);
    mauiFilter.setThesaurusFeatures(useThesaurusFeatures);
    mauiFilter.setWikipediaFeatures(useWikipediaFeatures, wikiFeatures);

    mauiFilter.setClassifier(classifier);

    if (!vocabularyName.equals("none")) {
        loadVocabulary();
        mauiFilter.setVocabulary(vocabulary);
    }

    log.info("-- Adding documents as instances... ");

    for (MauiDocument document : documents) {

        double[] newInst = new double[3];
        newInst[0] = data.attribute(0).addStringValue(document.getFileName());

        // Adding the text and the topics for the document to the instance
        if (document.getTextContent().length() > 0) {
            newInst[1] = data.attribute(1).addStringValue(document.getTextContent());
        } else {
            newInst[1] = Instance.missingValue();
        }

        if (document.getTopicsString().length() > 0) {
            newInst[2] = data.attribute(2).addStringValue(document.getTopicsString());
        } else {
            newInst[2] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        mauiFilter.input(data.instance(0));
        data = data.stringFreeStructure();
    }
    log.info("-- Building the model... ");

    mauiFilter.batchFinished();

    while ((mauiFilter.output()) != null) {
    }

    return mauiFilter;

}

From source file:com.esda.util.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//ww w  .  j  av  a 2 s.c  o m
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
    }

    // TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current
        // instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove
                // stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    String[] wordsArr = word.split(" ");
                    StringBuilder stemmedStr = new StringBuilder();
                    for (String wordStr : wordsArr) {
                        if (!this.m_useStoplist || !stopwords.is(wordStr)) {
                            stemmedStr.append(m_Stemmer.stem(wordStr));
                            stemmedStr.append(" ");
                        }
                    }
                    /*for (int icounter = 0; icounter < wordsArr.length; icounter++) {
                       stemmedStr += m_Stemmer.stem(wordsArr[icounter]);
                       if (icounter + 1 < wordsArr.length)
                          stemmedStr += " ";
                    }*/

                    word = stemmedStr.toString().trim();

                    if (!(h.containsKey(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        // updating the docCount for the words that have occurred in this
        // instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
        }
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java

public String clasificar(String[] testCases) throws Exception {
    String ruta = "model.model";

    InputStream classModelStream;
    classModelStream = getClass().getResourceAsStream(ruta);
    Classifier clasify = (Classifier) SerializationHelper.read(classModelStream);
    FastVector condition = new FastVector();
    condition.addElement("Cloudy");
    condition.addElement("Clear");
    condition.addElement("Sunny");
    condition.addElement("Fair");
    condition.addElement("Partly_Cloudy");
    condition.addElement("Mostly_Cloudy");
    condition.addElement("Showers");
    condition.addElement("Haze");
    condition.addElement("Dust");
    condition.addElement("Other");
    Attribute _condition = new Attribute("contition", condition);

    FastVector temperature = new FastVector();
    temperature.addElement("Hot");
    temperature.addElement("Mild");
    temperature.addElement("Cool");
    Attribute _temperature = new Attribute("temperature", temperature);

    FastVector chill = new FastVector();
    chill.addElement("Regrettable");
    chill.addElement("Mint");
    Attribute _chill = new Attribute("chill", chill);

    FastVector direction = new FastVector();
    direction.addElement("Mint");
    direction.addElement("Fair");
    direction.addElement("Regular");
    Attribute _direction = new Attribute("direction", direction);

    FastVector speed = new FastVector();
    speed.addElement("Mint");
    speed.addElement("Fair");
    speed.addElement("Regular");
    Attribute _speed = new Attribute("speed", speed);

    FastVector humidity = new FastVector();
    humidity.addElement("High");
    humidity.addElement("Normal");
    humidity.addElement("Low");
    Attribute _humidity = new Attribute("humidity", humidity);

    FastVector visibility = new FastVector();
    visibility.addElement("Recommended");
    visibility.addElement("Not_Recommended");
    Attribute _visibility = new Attribute("visibility", visibility);

    FastVector preassure = new FastVector();
    preassure.addElement("Fair");
    preassure.addElement("Mint");
    Attribute _preassure = new Attribute("preassure", preassure);

    FastVector Class = new FastVector();
    Class.addElement("Recommended");
    Class.addElement("Not_Recommended");
    Attribute _Class = new Attribute("class", Class);

    FastVector atributos = new FastVector(9);
    atributos.addElement(_condition);//from   ww  w  .  j a  v  a  2 s  . c  om
    atributos.addElement(_temperature);
    atributos.addElement(_chill);
    atributos.addElement(_direction);
    atributos.addElement(_speed);
    atributos.addElement(_humidity);
    atributos.addElement(_visibility);
    atributos.addElement(_preassure);
    atributos.addElement(_Class);

    ArrayList<Attribute> atributs = new ArrayList<>();
    atributs.add(_condition);
    atributs.add(_temperature);
    atributs.add(_chill);
    atributs.add(_direction);
    atributs.add(_speed);
    atributs.add(_humidity);
    atributs.add(_visibility);
    atributs.add(_preassure);
    atributs.add(_Class);

    //Aqu se crea la instacia, que tiene todos los atributos del modelo
    Instances dataTest = new Instances("TestCases", atributos, 1);
    dataTest.setClassIndex(8);

    Instance setPrueba = new Instance(9);

    int index = -1;
    for (int i = 0; i < 8; i++) {
        index = atributs.get(i).indexOfValue(testCases[i]);
        //System.out.println(i + " " + atributs.get(i)  + " " + index + " " + testCases[i]);
        setPrueba.setValue(atributs.get(i), index);
    }

    //Agregando el set que se desea evaluar.
    dataTest.add(setPrueba);

    //Realizando la Prediccin
    //La instancia es la 0 debido a que es la unica que se encuentra.
    double valorP = clasify.classifyInstance(dataTest.instance(0));
    //get the name of the class value
    String prediccion = dataTest.classAttribute().value((int) valorP);

    return prediccion;
}

From source file:com.github.polarisation.kea.main.KEAKeyphraseExtractor.java

License:Open Source License

/**
 * Builds the model from the files//from   w  w  w  . j a v  a  2s . com
 */
public void extractKeyphrases(Hashtable stems) throws Exception {

    Vector stats = new Vector();

    // Check whether there is actually any data
    // = if there any files in the directory
    if (stems.size() == 0) {
        throw new Exception("Couldn't find any data!");
    }
    m_KEAFilter.setNumPhrases(m_numPhrases);
    m_KEAFilter.setVocabulary(m_vocabulary);
    m_KEAFilter.setVocabularyFormat(m_vocabularyFormat);
    m_KEAFilter.setDocumentLanguage(getDocumentLanguage());
    m_KEAFilter.setStemmer(m_Stemmer);
    m_KEAFilter.setStopwords(m_Stopwords);

    if (getVocabulary().equals("none")) {
        m_KEAFilter.m_NODEfeature = false;
    } else {
        m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords);
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    atts.addElement(new Attribute("filename", (String) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    if (m_KEAFilter.m_Dictionary == null) {
        buildGlobalDictionaries(stems);
    }

    System.err.println("-- Extracting Keyphrases... ");
    // Extract keyphrases
    Enumeration elem = stems.keys();
    // Enumeration over all files in the directory (now in the hash):
    while (elem.hasMoreElements()) {
        String str = (String) elem.nextElement();

        double[] newInst = new double[2];
        try {
            File txt = new File(m_dirName + "/" + str + ".txt");
            InputStreamReader is;
            if (!m_encoding.equals("default")) {
                is = new InputStreamReader(new FileInputStream(txt), m_encoding);
            } else {
                is = new InputStreamReader(new FileInputStream(txt));
            }
            StringBuffer txtStr = new StringBuffer();
            int c;
            while ((c = is.read()) != -1) {
                txtStr.append((char) c);
            }

            newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());

        } catch (Exception e) {
            if (m_debug) {
                System.err.println("Can't read document " + str + ".txt");
            }
            newInst[0] = Instance.missingValue();
        }
        try {
            File key = new File(m_dirName + "/" + str + ".key");
            InputStreamReader is;
            if (!m_encoding.equals("default")) {
                is = new InputStreamReader(new FileInputStream(key), m_encoding);
            } else {
                is = new InputStreamReader(new FileInputStream(key));
            }
            StringBuffer keyStr = new StringBuffer();
            int c;

            // keyStr = keyphrases in the str.key file
            // Kea assumes, that these keyphrases were assigned by the author
            // and evaluates extracted keyphrases againse these

            while ((c = is.read()) != -1) {
                keyStr.append((char) c);
            }

            newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString());
        } catch (Exception e) {
            if (m_debug) {
                System.err.println("No existing keyphrases for stem " + str + ".");
            }
            newInst[1] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        m_KEAFilter.input(data.instance(0));

        data = data.stringFreeStructure();
        if (m_debug) {
            System.err.println("-- Document: " + str);
        }
        Instance[] topRankedInstances = new Instance[m_numPhrases];
        Instance inst;

        // Iterating over all extracted keyphrases (inst)
        while ((inst = m_KEAFilter.output()) != null) {

            int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1;

            if (index < m_numPhrases) {
                topRankedInstances[index] = inst;

            }
        }

        if (m_debug) {
            System.err.println("-- Keyphrases and feature values:");
        }
        FileOutputStream out = null;
        PrintWriter printer = null;
        File key = new File(m_dirName + "/" + str + ".key");
        if (!key.exists()) {
            out = new FileOutputStream(m_dirName + "/" + str + ".key");
            if (!m_encoding.equals("default")) {
                printer = new PrintWriter(new OutputStreamWriter(out, m_encoding));

            } else {
                printer = new PrintWriter(out);
            }
        }
        double numExtracted = 0, numCorrect = 0;

        for (int i = 0; i < m_numPhrases; i++) {
            if (topRankedInstances[i] != null) {
                if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
                    numExtracted += 1.0;
                }
                if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
                    numCorrect += 1.0;
                }
                if (printer != null) {
                    printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex()));

                    if (m_AdditionalInfo) {
                        printer.print("\t");
                        printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex()));
                        printer.print("\t");
                        printer.print(Utils.doubleToString(
                                topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4));
                    }
                    printer.println();
                }
                if (m_debug) {
                    System.err.println(topRankedInstances[i]);
                }
            }
        }
        if (numExtracted > 0) {
            if (m_debug) {
                System.err.println("-- " + numCorrect + " correct");
            }
            stats.addElement(new Double(numCorrect));
        }
        if (printer != null) {
            printer.flush();
            printer.close();
            out.close();
        }
    }
    double[] st = new double[stats.size()];
    for (int i = 0; i < stats.size(); i++) {
        st[i] = ((Double) stats.elementAt(i)).doubleValue();
    }
    double avg = Utils.mean(st);
    double stdDev = Math.sqrt(Utils.variance(st));

    System.err.println("Avg. number of matching keyphrases compared to existing ones : "
            + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2));
    System.err.println("Based on " + stats.size() + " documents");
    // m_KEAFilter.batchFinished();
}

From source file:com.hack23.cia.service.impl.action.user.wordcount.WordCounterImpl.java

License:Apache License

@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData,
        final int maxResult) {

    final String html = documentContentData.getContent();

    final Attribute input = new Attribute("html", (ArrayList<String>) null);

    final ArrayList<Attribute> inputVec = new ArrayList<>();
    inputVec.add(input);//from  w  ww  .  ja  va  2  s .  c o m

    final Instances htmlInst = new Instances("html", inputVec, 1);

    htmlInst.add(new DenseInstance(1));
    htmlInst.instance(0).setValue(0, html);

    final StopwordsHandler StopwordsHandler = new StopwordsHandler() {

        @Override
        public boolean isStopword(final String word) {

            return word.length() < 5;
        }
    };

    final NGramTokenizer tokenizer = new NGramTokenizer();
    tokenizer.setNGramMinSize(1);
    tokenizer.setNGramMaxSize(1);
    tokenizer.setDelimiters(" \r\n\t.,;:'\"()?!'");

    final StringToWordVector filter = new StringToWordVector();
    filter.setTokenizer(tokenizer);
    filter.setStopwordsHandler(StopwordsHandler);
    filter.setLowerCaseTokens(true);
    filter.setOutputWordCounts(true);
    filter.setWordsToKeep(maxResult);

    final Map<String, Integer> result = new HashMap<>();

    try {
        filter.setInputFormat(htmlInst);
        final Instances dataFiltered = Filter.useFilter(htmlInst, filter);

        final Instance last = dataFiltered.lastInstance();

        final int numAttributes = last.numAttributes();

        for (int i = 0; i < numAttributes; i++) {
            result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
        }
    } catch (final Exception e) {
        LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
    }

    return result;
}

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

public static String classify(String model, String text, JTextArea log) {
    FilteredClassifier classifier = loadModel(model, log);

    //Create the instance
    ArrayList<String> fvNominalVal = new ArrayList<String>();
    fvNominalVal.add("ham");
    fvNominalVal.add("spam");

    Attribute attribute1 = new Attribute("spam_class", fvNominalVal);
    Attribute attribute2 = new Attribute("text", (List<String>) null);
    ArrayList<Attribute> fvWekaAttributes = new ArrayList<Attribute>();
    fvWekaAttributes.add(attribute1);//ww w . j  ava2 s . co m
    fvWekaAttributes.add(attribute2);

    Instances instances = new Instances("Test relation", fvWekaAttributes, 1);
    instances.setClassIndex(0);

    DenseInstance instance = new DenseInstance(2);
    instance.setValue(attribute2, text);
    instances.add(instance);

    publishEstado("=== Instance created ===", log);
    publishEstado(instances.toString(), log);

    //Classify the instance
    try {
        publishEstado("=== Classifying instance ===", log);

        double pred = classifier.classifyInstance(instances.instance(0));

        publishEstado("=== Instance classified  ===", log);

        String classPredicted = instances.classAttribute().value((int) pred);
        publishEstado("Class predicted: " + classPredicted, log);

        return classPredicted;
    } catch (Exception e) {
        publishEstado("Error found when classifying the text", log);
        return null;
    }
}

From source file:com.jgaap.util.Instance.java

License:Open Source License

/**
 * Main method for testing this class.//from  w w w. j  a v  a2 s .co  m
 * 
 * @param options the commandline options - ignored
 */
//@ requires options != null;
public static void main(String[] options) {

    try {

        // Create numeric attributes "length" and "weight"
        Attribute length = new Attribute("length");
        Attribute weight = new Attribute("weight");

        // Create vector to hold nominal values "first", "second", "third" 
        FastVector my_nominal_values = new FastVector(3);
        my_nominal_values.addElement("first");
        my_nominal_values.addElement("second");
        my_nominal_values.addElement("third");

        // Create nominal attribute "position" 
        Attribute position = new Attribute("position", my_nominal_values);

        // Create vector of the above attributes 
        FastVector attributes = new FastVector(3);
        attributes.addElement(length);
        attributes.addElement(weight);
        attributes.addElement(position);

        // Create the empty dataset "race" with above attributes
        Instances race = new Instances("race", attributes, 0);

        // Make position the class attribute
        race.setClassIndex(position.index());

        // Create empty instance with three attribute values
        Instance inst = new Instance(3);

        // Set instance's values for the attributes "length", "weight", and "position"
        inst.setValue(length, 5.3);
        inst.setValue(weight, 300);
        inst.setValue(position, "first");

        // Set instance's dataset to be the dataset "race"
        inst.setDataset(race);

        // Print the instance
        System.out.println("The instance: " + inst);

        // Print the first attribute
        System.out.println("First attribute: " + inst.attribute(0));

        // Print the class attribute
        System.out.println("Class attribute: " + inst.classAttribute());

        // Print the class index
        System.out.println("Class index: " + inst.classIndex());

        // Say if class is missing
        System.out.println("Class is missing: " + inst.classIsMissing());

        // Print the instance's class value in internal format
        System.out.println("Class value (internal format): " + inst.classValue());

        // Print a shallow copy of this instance
        Instance copy = (Instance) inst.copy();
        System.out.println("Shallow copy: " + copy);

        // Set dataset for shallow copy
        copy.setDataset(inst.dataset());
        System.out.println("Shallow copy with dataset set: " + copy);

        // Unset dataset for copy, delete first attribute, and insert it again
        copy.setDataset(null);
        copy.deleteAttributeAt(0);
        copy.insertAttributeAt(0);
        copy.setDataset(inst.dataset());
        System.out.println("Copy with first attribute deleted and inserted: " + copy);

        // Enumerate attributes (leaving out the class attribute)
        System.out.println("Enumerating attributes (leaving out class):");
        Enumeration enu = inst.enumerateAttributes();
        while (enu.hasMoreElements()) {
            Attribute att = (Attribute) enu.nextElement();
            System.out.println(att);
        }

        // Headers are equivalent?
        System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy));

        // Test for missing values
        System.out.println("Length of copy missing: " + copy.isMissing(length));
        System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
        System.out.println("Length of copy missing: " + Instance.isMissingValue(copy.value(length)));
        System.out.println("Missing value coded as: " + Instance.missingValue());

        // Prints number of attributes and classes
        System.out.println("Number of attributes: " + copy.numAttributes());
        System.out.println("Number of classes: " + copy.numClasses());

        // Replace missing values
        double[] meansAndModes = { 2, 3, 0 };
        copy.replaceMissingValues(meansAndModes);
        System.out.println("Copy with missing value replaced: " + copy);

        // Setting and getting values and weights
        copy.setClassMissing();
        System.out.println("Copy with missing class: " + copy);
        copy.setClassValue(0);
        System.out.println("Copy with class value set to first value: " + copy);
        copy.setClassValue("third");
        System.out.println("Copy with class value set to \"third\": " + copy);
        copy.setMissing(1);
        System.out.println("Copy with second attribute set to be missing: " + copy);
        copy.setMissing(length);
        System.out.println("Copy with length set to be missing: " + copy);
        copy.setValue(0, 0);
        System.out.println("Copy with first attribute set to 0: " + copy);
        copy.setValue(weight, 1);
        System.out.println("Copy with weight attribute set to 1: " + copy);
        copy.setValue(position, "second");
        System.out.println("Copy with position set to \"second\": " + copy);
        copy.setValue(2, "first");
        System.out.println("Copy with last attribute set to \"first\": " + copy);
        System.out.println("Current weight of instance copy: " + copy.weight());
        copy.setWeight(2);
        System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
        System.out.println("Last value of copy: " + copy.toString(2));
        System.out.println("Value of position for copy: " + copy.toString(position));
        System.out.println("Last value of copy (internal format): " + copy.value(2));
        System.out.println("Value of position for copy (internal format): " + copy.value(position));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.kdcloud.lib.domain.DataSpecification.java

License:Open Source License

public Instances newInstances(String relationalName) {
    return new Instances(relationalName, getAttrInfo(), 1000);
}

From source file:com.kdcloud.lib.domain.DataSpecification.java

License:Open Source License

public boolean matchingSpecification(Instances instances) {
    return new Instances("", getAttrInfo(), 0).equalHeaders(instances);
}