Example usage for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

private void train() throws Exception {
    models = new M5P[ModelNum];
    for (int i = 0; i < ModelNum; i++) {
        models[i] = buildModel(labeledInstances, M[i]);
    }/*  www .  jav  a  2  s  .c om*/

    for (int i = 0; i < this.comtIterations; i++) {
        ArrayList<Instance>[] InstancePiSet = new ArrayList[ModelNum];
        for (int j = 0; j < ModelNum; j++)
            InstancePiSet[j] = new ArrayList<Instance>();

        for (int m = 0; m < ModelNum; m++) {
            double maxDelta = 0;
            Instance maxDeltaXY = null;
            Enumeration<Instance> enu = this.unlabeledInstances.enumerateInstances();

            while (enu.hasMoreElements()) {
                Instance ulIns = enu.nextElement();
                Instances omega = getSiblings(models[m], ulIns);
                double y = models[m].classifyInstance(ulIns);
                if (indexOfClass == -1)
                    indexOfClass = labeledInstances.classIndex();
                ulIns.setValue(indexOfClass, y);

                Instances instancesPi = new Instances(models[m].getM5RootNode().zyqGetTrainingSet());
                instancesPi.add(ulIns);
                M5P modelPi = buildModel(instancesPi, M[m]);
                double delta = computeOmegaDelta(models[m], modelPi, omega);
                if (maxDelta < delta) {
                    maxDelta = delta;
                    maxDeltaXY = ulIns;
                }
            }

            //now check facts about delta
            if (maxDelta > 0) {
                InstancePiSet[m].add(maxDeltaXY);
                this.unlabeledInstances.delete(this.unlabeledInstances.indexOf(maxDeltaXY));
            }
        } //check for both model

        boolean toExit = true;
        for (int m = 0; m < ModelNum; m++) {
            if (InstancePiSet[m].size() > 0) {
                toExit = false;
                break;
            }
        }

        if (toExit)
            break;
        else {
            //update the models
            int toGen = 0;
            for (int m = 0; m < ModelNum; m++) {
                Instances set = models[m].getM5RootNode().zyqGetTrainingSet();
                toGen += InstancePiSet[m].size();
                for (Instance ins : InstancePiSet[m])
                    set.add(ins);

                models[m] = buildModel(set, M[m]);
            }

            //Replenish pool U' to size p
            Instances toAdd = retrieveMore(toGen);
            unlabeledInstances.addAll(toAdd);
        } //we will go to another round of iteration
    } //iterate for a number of rounds or break out on empty InstancesPiSets

    //now we have the model as y = 0.5*sum(models[m].predict(x))
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java

License:Apache License

/**
 * Method that converts a text message into an instance.
 *
 * @param text the message content to convert
 * @param data the header information/*from www  .  java2s .  c o m*/
 * @return the generated Instance
 */
private Instance makeInstance(String text, Instances data) {
    Instance instance = new Instance(2);
    Attribute messageAtt = data.attribute("content");
    instance.setValue(messageAtt, messageAtt.addStringValue(text));
    instance.setDataset(data);
    return instance;
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java

License:Apache License

/**
 * Method which contructs the arff file for weka with the training data
 *//*from   www  .  j ava2  s .co m*/
public static void constructModel() {
    Instances instdata = null;
    try {
        FastVector atts;
        atts = new FastVector();
        atts.addElement(new Attribute("content", (FastVector) null));
        FastVector fvClassVal = new FastVector(4);
        fvClassVal.addElement("");
        fvClassVal.addElement("neutral");
        fvClassVal.addElement("negative");
        fvClassVal.addElement("positive");
        Attribute ClassAttribute = new Attribute("Class", fvClassVal);
        atts.addElement(ClassAttribute);
        instdata = new Instances("tweetData", atts, 0);
        CsvReader data = new CsvReader("../classified data/traindata.csv");
        int i = 0;
        while (data.readRecord()) {
            double[] vals = new double[instdata.numAttributes()];
            String class_id = data.get(0);
            switch (Integer.parseInt(class_id)) {
            case 0:
                class_id = "negative";
                break;
            case 2:
                class_id = "neutral";
                break;
            case 4:
                class_id = "positive";
                break;
            }
            String tweet_content = data.get(5);
            Instance iInst = new Instance(2);
            iInst.setValue((Attribute) atts.elementAt(0), tweet_content);
            iInst.setValue((Attribute) atts.elementAt(1), class_id);
            instdata.add(iInst);
            System.out.println("[" + i + "] " + class_id + ":" + tweet_content);
            i++;
        }
        data.close();
        StringToWordVector filter = new StringToWordVector();
        instdata.setClassIndex(instdata.numAttributes() - 1);
        filter.setInputFormat(instdata);
        Instances newdata = Filter.useFilter(instdata, filter);
        ArffSaver saver = new ArffSaver();
        saver.setInstances(newdata);
        saver.setFile(new File("./data/train2data.arff"));
        saver.writeBatch();
    } catch (Exception ex) {
        Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.entopix.maui.filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*w  w w. j av a2  s  .co m*/
 */
private FastVector convertInstance(Instance instance, boolean training) {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        log.info("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        log.info(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        double[] probs = null;
        try {
            // Get probability of a phrase being key phrase
            probs = classifier.distributionForInstance(inst);
        } catch (Exception e) {
            log.error("Exception while getting probability for candidate " + candidate.getName());
            continue;
        }

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // 0 Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // 1 Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                // 2
                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex); // 3
                newInst[pos++] = inst.value(idfIndex); // 4
                newInst[pos++] = inst.value(tfidfIndex); // 5
                newInst[pos++] = inst.value(firstOccurIndex); // 6
                newInst[pos++] = inst.value(lastOccurIndex); // 7
                newInst[pos++] = inst.value(spreadOccurIndex); // 8
                newInst[pos++] = inst.value(domainKeyphIndex); // 9
                newInst[pos++] = inst.value(lengthIndex); // 10 
                newInst[pos++] = inst.value(generalityIndex); // 11
                newInst[pos++] = inst.value(nodeDegreeIndex); // 12
                newInst[pos++] = inst.value(invWikipFreqIndex); // 13
                newInst[pos++] = inst.value(totalWikipKeyphrIndex); // 14
                newInst[pos++] = inst.value(wikipGeneralityIndex); // 15

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob; // 16

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue(); // 17

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }

    }
    if (debugMode) {
        log.info(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);

        // log.info(vals[i] + "\t" + currentInstance);

        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java

public String clasificar(String[] testCases) throws Exception {
    String ruta = "model.model";

    InputStream classModelStream;
    classModelStream = getClass().getResourceAsStream(ruta);
    Classifier clasify = (Classifier) SerializationHelper.read(classModelStream);
    FastVector condition = new FastVector();
    condition.addElement("Cloudy");
    condition.addElement("Clear");
    condition.addElement("Sunny");
    condition.addElement("Fair");
    condition.addElement("Partly_Cloudy");
    condition.addElement("Mostly_Cloudy");
    condition.addElement("Showers");
    condition.addElement("Haze");
    condition.addElement("Dust");
    condition.addElement("Other");
    Attribute _condition = new Attribute("contition", condition);

    FastVector temperature = new FastVector();
    temperature.addElement("Hot");
    temperature.addElement("Mild");
    temperature.addElement("Cool");
    Attribute _temperature = new Attribute("temperature", temperature);

    FastVector chill = new FastVector();
    chill.addElement("Regrettable");
    chill.addElement("Mint");
    Attribute _chill = new Attribute("chill", chill);

    FastVector direction = new FastVector();
    direction.addElement("Mint");
    direction.addElement("Fair");
    direction.addElement("Regular");
    Attribute _direction = new Attribute("direction", direction);

    FastVector speed = new FastVector();
    speed.addElement("Mint");
    speed.addElement("Fair");
    speed.addElement("Regular");
    Attribute _speed = new Attribute("speed", speed);

    FastVector humidity = new FastVector();
    humidity.addElement("High");
    humidity.addElement("Normal");
    humidity.addElement("Low");
    Attribute _humidity = new Attribute("humidity", humidity);

    FastVector visibility = new FastVector();
    visibility.addElement("Recommended");
    visibility.addElement("Not_Recommended");
    Attribute _visibility = new Attribute("visibility", visibility);

    FastVector preassure = new FastVector();
    preassure.addElement("Fair");
    preassure.addElement("Mint");
    Attribute _preassure = new Attribute("preassure", preassure);

    FastVector Class = new FastVector();
    Class.addElement("Recommended");
    Class.addElement("Not_Recommended");
    Attribute _Class = new Attribute("class", Class);

    FastVector atributos = new FastVector(9);
    atributos.addElement(_condition);//from w  ww.j  a  v a2 s . c  o m
    atributos.addElement(_temperature);
    atributos.addElement(_chill);
    atributos.addElement(_direction);
    atributos.addElement(_speed);
    atributos.addElement(_humidity);
    atributos.addElement(_visibility);
    atributos.addElement(_preassure);
    atributos.addElement(_Class);

    ArrayList<Attribute> atributs = new ArrayList<>();
    atributs.add(_condition);
    atributs.add(_temperature);
    atributs.add(_chill);
    atributs.add(_direction);
    atributs.add(_speed);
    atributs.add(_humidity);
    atributs.add(_visibility);
    atributs.add(_preassure);
    atributs.add(_Class);

    //Aqu se crea la instacia, que tiene todos los atributos del modelo
    Instances dataTest = new Instances("TestCases", atributos, 1);
    dataTest.setClassIndex(8);

    Instance setPrueba = new Instance(9);

    int index = -1;
    for (int i = 0; i < 8; i++) {
        index = atributs.get(i).indexOfValue(testCases[i]);
        //System.out.println(i + " " + atributs.get(i)  + " " + index + " " + testCases[i]);
        setPrueba.setValue(atributs.get(i), index);
    }

    //Agregando el set que se desea evaluar.
    dataTest.add(setPrueba);

    //Realizando la Prediccin
    //La instancia es la 0 debido a que es la unica que se encuentra.
    double valorP = clasify.classifyInstance(dataTest.instance(0));
    //get the name of the class value
    String prediccion = dataTest.classAttribute().value((int) valorP);

    return prediccion;
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Converts an instance.//from w w  w  . jav  a 2  s  . com
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    if (m_Debug) {
        log.info("-- Converting instance");
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;
    HashMap<String, Counter> hashKeysEval = null;
    if (!instance.isMissing(m_KeyphrasesAtt)) {
        String keyphrases = instance.stringValue(m_KeyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        hashKeysEval = getGivenKeyphrases(keyphrases, true);
    }

    // Get the phrases for the document
    HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
    int length = getPhrases(hash, instance.stringValue(m_DocumentAtt));
    //   hash = getComposits(hash);

    /* Experimental:
     To compute how many of the manual keyphrases appear in the documents:
            
    log.info("Doc phrases found " + hash.size());
    log.info("Manual keyphrases: ");
    Iterator iter = hashKeyphrases.keySet().iterator();
    int count = 0;
    while (iter.hasNext()) {
       String id = (String)iter.next();
       if (hash.containsKey(id)) {
    count++;
       }
    }
            
    double max_recall = (double)count/(double)hashKeyphrases.size();
            
            
    m_max_recall += max_recall;
    doc++;
    double avg_m_max_recall = m_max_recall/(double)doc;
            
    String file = instance.stringValue(2);
    log.info(count + " out of " + hashKeyphrases.size() + " are in the document ");
    log.info("Max recall : " + avg_m_max_recall + " on " + doc + " documents ");
    */

    // Compute number of extra attributes
    int numFeatures = 5;
    if (m_Debug) {
        if (m_KFused) {
            numFeatures = numFeatures + 1;
        }
    }
    if (m_STDEVfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_NODEfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_LENGTHfeature) {
        numFeatures = numFeatures + 1;
    }

    // Set indices of key attributes
    //int phraseAttIndex = m_DocumentAtt;
    int tfidfAttIndex = m_DocumentAtt + 2;
    int distAttIndex = m_DocumentAtt + 3;
    int probsAttIndex = m_DocumentAtt + numFeatures - 1;
    //int classAttIndex = numFeatures;

    // Go through the phrases and convert them into instances
    Iterator<String> it = hash.keySet().iterator();
    while (it.hasNext()) {
        String id = it.next();
        FastVector phraseInfo = (FastVector) hash.get(id);

        double[] vals = featVals(id, phraseInfo, training, hashKeysEval, hashKeyphrases, length, hash);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(m_ClassifierData);

        // Get probability of a phrase being key phrase
        double[] probs = m_Classifier.distributionForInstance(inst);

        // If simple Naive Bayes used, change here to
        //double prob = probs[1];
        double prob = probs[0];

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures];
        int pos = 0;
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (i == m_DocumentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(id);
                newInst[pos++] = index;

                // Add original version
                String orig = (String) phraseInfo.elementAt(2);

                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(id);
                }
                newInst[pos++] = index;

                // Add TFxIDF
                newInst[pos++] = inst.value(m_TfidfIndex);

                // Add distance
                newInst[pos++] = inst.value(m_FirstOccurIndex);

                // Add other features
                if (m_Debug) {
                    if (m_KFused) {
                        newInst[pos++] = inst.value(m_KeyFreqIndex);
                    }
                }
                if (m_STDEVfeature) {
                    newInst[pos++] = inst.value(m_STDEVIndex);
                }
                if (m_NODEfeature) {
                    newInst[pos++] = inst.value(m_NodeIndex);
                }
                if (m_LENGTHfeature) {
                    newInst[pos++] = inst.value(m_LengthIndex);
                }

                // Add probability 
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == m_KeyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }
        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);
    }

    // Add dummy instances for keyphrases that don't occur
    // in the document
    if (hashKeysEval != null) {
        Iterator<String> phrases = hashKeysEval.keySet().iterator();
        while (phrases.hasNext()) {
            String phrase = phrases.next();
            double[] newInst = new double[instance.numAttributes() + numFeatures];
            int pos = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == m_DocumentAtt) {
                    // log.info("Here: " + phrase);
                    // Add phrase
                    int index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add original version
                    index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add TFxIDF
                    newInst[pos++] = Instance.missingValue();

                    // Add distance
                    newInst[pos++] = Instance.missingValue();

                    // Add other features
                    if (m_Debug) {
                        if (m_KFused) {
                            newInst[pos++] = Instance.missingValue();
                        }
                    }
                    if (m_STDEVfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_NODEfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_LENGTHfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }

                    // Add probability and rank
                    newInst[pos++] = -Double.MAX_VALUE;
                    // newInst[pos++] = Instance.missingValue();
                } else if (i == m_KeyphrasesAtt) {
                    newInst[pos++] = 1; // Keyphrase
                } else {
                    newInst[pos++] = instance.value(i);
                }

                Instance inst = new Instance(instance.weight(), newInst);
                inst.setDataset(outputFormatPeek());
                vector.addElement(inst);
            }

        }
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }
    return vector;
}

From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java

License:Open Source License

@Override
public Dataset mapStringToModel(JsonRequest request) throws ParseException {

    if (request != null && request.getData() != null && request.getData().length > 0) {
        FastVector fvWekaAttributes = new FastVector(2);
        FastVector nil = null;/*w w w .  j  a v  a2  s.c om*/
        Attribute attr0 = new Attribute("text", nil, 0);
        FastVector fv = new FastVector();
        for (String nominal : request.getClassVars()) {
            fv.addElement(nominal);
        }
        Attribute attr1 = new Attribute("class", fv, 1);

        fvWekaAttributes.addElement(attr0);
        fvWekaAttributes.addElement(attr1);

        Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length);
        ins.setClassIndex(1);
        for (Text s : request.getData()) {
            Instance i = new Instance(2);
            i.setValue(attr0, s.getText());
            i.setValue(attr1, s.getTclass());
            ins.add(i);

        }

        return new Dataset(ins);
    }
    return null;
}

From source file:com.tum.classifiertest.FastRfUtils.java

License:Open Source License

/**
 * Produces a random permutation of the values of an attribute in a dataset using Knuth shuffle.
 * <p/>/*from  w ww. ja  v  a2  s .c om*/
 * Copies back the current values of the previously scrambled attribute and uses the given permutation
 * to scramble the values of the new attribute all by copying from the original dataset.
 *
 * @param src      the source dataset
 * @param dst      the scrambled dataset
 * @param attIndex the attribute index
 * @param perm     the random permutation
 *
 * @return fluent
 */
public static Instances scramble(Instances src, Instances dst, final int attIndex, int[] perm) {

    for (int i = 0; i < src.numInstances(); i++) {

        Instance scrambled = dst.instance(i);

        if (attIndex > 0)
            scrambled.setValue(attIndex - 1, src.instance(i).value(attIndex - 1));
        scrambled.setValue(attIndex, src.instance(perm[i]).value(attIndex));
    }

    return dst;
}

From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java

License:Apache License

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}./*from w w w .  ja  v a  2s . c om*/
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
    if (url != null) {

        Instance inst = new SparseInstance(dimension);
        inst.replaceMissingValues(replaceMissingValues);

        inst.setDataset(instances);
        inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
        inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
        Set<String> tokens = new HashSet<String>();

        tokens.addAll(tokenizer(url.uri.getPath()));
        tokens.addAll(tokenizer(url.uri.getQuery()));
        tokens.addAll(tokenizer(url.uri.getFragment()));
        for (String tok : tokens) {
            inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
        }
        return inst;

    } else {
        System.out.println("Input AnthURL for convertion into instance was null.");
        return null;
    }
}

From source file:com.zazhu.BlueHub.BlueHub.java

License:Apache License

/**
 * receives the last reads from the sensors and creates the features we use
 * only the acc x,y,z (either from internal or external sensor)
 * /*from  w  w  w  .j  a  va 2 s  . c  o m*/
 * @param sensorQueue
 * @throws Exception 
 */
private Instance processingSenseData(Queue<String> sensorQueue, char whatSensor) throws Exception {

    BufferedReader reader;
    Instances format;
    Instance newInstance = null;

    Log.d(TAG, "Queue size = " + mQueueSize);

    if (sensorQueue.size() <= 0)
        throw new Exception("Queue empty");

    // create the arrays that will contain the accelerometer data
    // s.x s.y s.z
    double[] sx = new double[sensorQueue.size()];
    double[] sy = new double[sensorQueue.size()];
    double[] sz = new double[sensorQueue.size()];

    String rawReading;
    StringTokenizer st;

    int index;

    if (D)
        Log.e(TAG, "+++ COMPUTING FEATURES +++");

    // 1. collect raw data. what kind of sensing data? external vs. internal
    switch (whatSensor) {
    case EXTERNAL:
        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {
            // FORMAT:
            // "Time_SensorName_SensorNumber_Counter_Xacc_Yacc_Zacc_Xgyro_Ygyro_checksum"
            // position of the values needed: s.x = 4, s.y = 5, s.z = 6
            st = new StringTokenizer(rawReading, FIELD_SEP);
            // not needed data
            for (int i = 0; i < 4; i++)
                st.nextToken();
            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_extern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    case INTERNAL:

        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {

            // FORMAT "Xacc_Yacc_Zacc"
            // position of the values needed: s.x = 0, s.y = 1, s.z = 2
            st = new StringTokenizer(rawReading, FIELD_SEP);

            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_intern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    default:
        if (D)
            Log.e(TAG, "+++ COMPUTING FEATURES: NO VALUE FOR THE SENSOR READING +++");
        break;
    }

    return newInstance;

}