Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

private void train() throws Exception {
    models = new M5P[ModelNum];
    for (int i = 0; i < ModelNum; i++) {
        models[i] = buildModel(labeledInstances, M[i]);
    }/*  www .  jav  a  2  s  .c om*/

    for (int i = 0; i < this.comtIterations; i++) {
        ArrayList<Instance>[] InstancePiSet = new ArrayList[ModelNum];
        for (int j = 0; j < ModelNum; j++)
            InstancePiSet[j] = new ArrayList<Instance>();

        for (int m = 0; m < ModelNum; m++) {
            double maxDelta = 0;
            Instance maxDeltaXY = null;
            Enumeration<Instance> enu = this.unlabeledInstances.enumerateInstances();

            while (enu.hasMoreElements()) {
                Instance ulIns = enu.nextElement();
                Instances omega = getSiblings(models[m], ulIns);
                double y = models[m].classifyInstance(ulIns);
                if (indexOfClass == -1)
                    indexOfClass = labeledInstances.classIndex();
                ulIns.setValue(indexOfClass, y);

                Instances instancesPi = new Instances(models[m].getM5RootNode().zyqGetTrainingSet());
                instancesPi.add(ulIns);
                M5P modelPi = buildModel(instancesPi, M[m]);
                double delta = computeOmegaDelta(models[m], modelPi, omega);
                if (maxDelta < delta) {
                    maxDelta = delta;
                    maxDeltaXY = ulIns;
                }
            }

            //now check facts about delta
            if (maxDelta > 0) {
                InstancePiSet[m].add(maxDeltaXY);
                this.unlabeledInstances.delete(this.unlabeledInstances.indexOf(maxDeltaXY));
            }
        } //check for both model

        boolean toExit = true;
        for (int m = 0; m < ModelNum; m++) {
            if (InstancePiSet[m].size() > 0) {
                toExit = false;
                break;
            }
        }

        if (toExit)
            break;
        else {
            //update the models
            int toGen = 0;
            for (int m = 0; m < ModelNum; m++) {
                Instances set = models[m].getM5RootNode().zyqGetTrainingSet();
                toGen += InstancePiSet[m].size();
                for (Instance ins : InstancePiSet[m])
                    set.add(ins);

                models[m] = buildModel(set, M[m]);
            }

            //Replenish pool U' to size p
            Instances toAdd = retrieveMore(toGen);
            unlabeledInstances.addAll(toAdd);
        } //we will go to another round of iteration
    } //iterate for a number of rounds or break out on empty InstancesPiSets

    //now we have the model as y = 0.5*sum(models[m].predict(x))
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java

License:Apache License

/**
 * Method that converts a text message into an instance.
 *
 * @param text the message content to convert
 * @param data the header information/*from www  .  java2s .  c o m*/
 * @return the generated Instance
 */
private Instance makeInstance(String text, Instances data) {
    Instance instance = new Instance(2);
    Attribute messageAtt = data.attribute("content");
    instance.setValue(messageAtt, messageAtt.addStringValue(text));
    instance.setDataset(data);
    return instance;
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java

License:Apache License

/**
 * Method which contructs the arff file for weka with the training data
 *//*from   www  .  j ava2  s .co m*/
public static void constructModel() {
    Instances instdata = null;
    try {
        FastVector atts;
        atts = new FastVector();
        atts.addElement(new Attribute("content", (FastVector) null));
        FastVector fvClassVal = new FastVector(4);
        fvClassVal.addElement("");
        fvClassVal.addElement("neutral");
        fvClassVal.addElement("negative");
        fvClassVal.addElement("positive");
        Attribute ClassAttribute = new Attribute("Class", fvClassVal);
        atts.addElement(ClassAttribute);
        instdata = new Instances("tweetData", atts, 0);
        CsvReader data = new CsvReader("../classified data/traindata.csv");
        int i = 0;
        while (data.readRecord()) {
            double[] vals = new double[instdata.numAttributes()];
            String class_id = data.get(0);
            switch (Integer.parseInt(class_id)) {
            case 0:
                class_id = "negative";
                break;
            case 2:
                class_id = "neutral";
                break;
            case 4:
                class_id = "positive";
                break;
            }
            String tweet_content = data.get(5);
            Instance iInst = new Instance(2);
            iInst.setValue((Attribute) atts.elementAt(0), tweet_content);
            iInst.setValue((Attribute) atts.elementAt(1), class_id);
            instdata.add(iInst);
            System.out.println("[" + i + "] " + class_id + ":" + tweet_content);
            i++;
        }
        data.close();
        StringToWordVector filter = new StringToWordVector();
        instdata.setClassIndex(instdata.numAttributes() - 1);
        filter.setInputFormat(instdata);
        Instances newdata = Filter.useFilter(instdata, filter);
        ArffSaver saver = new ArffSaver();
        saver.setInstances(newdata);
        saver.setFile(new File("./data/train2data.arff"));
        saver.writeBatch();
    } catch (Exception ex) {
        Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.entopix.maui.filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*w  w w. j av a2  s  .co m*/
 */
private FastVector convertInstance(Instance instance, boolean training) {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        log.info("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        log.info(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        double[] probs = null;
        try {
            // Get probability of a phrase being key phrase
            probs = classifier.distributionForInstance(inst);
        } catch (Exception e) {
            log.error("Exception while getting probability for candidate " + candidate.getName());
            continue;
        }

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // 0 Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // 1 Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                // 2
                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex); // 3
                newInst[pos++] = inst.value(idfIndex); // 4
                newInst[pos++] = inst.value(tfidfIndex); // 5
                newInst[pos++] = inst.value(firstOccurIndex); // 6
                newInst[pos++] = inst.value(lastOccurIndex); // 7
                newInst[pos++] = inst.value(spreadOccurIndex); // 8
                newInst[pos++] = inst.value(domainKeyphIndex); // 9
                newInst[pos++] = inst.value(lengthIndex); // 10 
                newInst[pos++] = inst.value(generalityIndex); // 11
                newInst[pos++] = inst.value(nodeDegreeIndex); // 12
                newInst[pos++] = inst.value(invWikipFreqIndex); // 13
                newInst[pos++] = inst.value(totalWikipKeyphrIndex); // 14
                newInst[pos++] = inst.value(wikipGeneralityIndex); // 15

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob; // 16

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue(); // 17

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }

    }
    if (debugMode) {
        log.info(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);

        // log.info(vals[i] + "\t" + currentInstance);

        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java

public String clasificar(String[] testCases) throws Exception {
    String ruta = "model.model";

    InputStream classModelStream;
    classModelStream = getClass().getResourceAsStream(ruta);
    Classifier clasify = (Classifier) SerializationHelper.read(classModelStream);
    FastVector condition = new FastVector();
    condition.addElement("Cloudy");
    condition.addElement("Clear");
    condition.addElement("Sunny");
    condition.addElement("Fair");
    condition.addElement("Partly_Cloudy");
    condition.addElement("Mostly_Cloudy");
    condition.addElement("Showers");
    condition.addElement("Haze");
    condition.addElement("Dust");
    condition.addElement("Other");
    Attribute _condition = new Attribute("contition", condition);

    FastVector temperature = new FastVector();
    temperature.addElement("Hot");
    temperature.addElement("Mild");
    temperature.addElement("Cool");
    Attribute _temperature = new Attribute("temperature", temperature);

    FastVector chill = new FastVector();
    chill.addElement("Regrettable");
    chill.addElement("Mint");
    Attribute _chill = new Attribute("chill", chill);

    FastVector direction = new FastVector();
    direction.addElement("Mint");
    direction.addElement("Fair");
    direction.addElement("Regular");
    Attribute _direction = new Attribute("direction", direction);

    FastVector speed = new FastVector();
    speed.addElement("Mint");
    speed.addElement("Fair");
    speed.addElement("Regular");
    Attribute _speed = new Attribute("speed", speed);

    FastVector humidity = new FastVector();
    humidity.addElement("High");
    humidity.addElement("Normal");
    humidity.addElement("Low");
    Attribute _humidity = new Attribute("humidity", humidity);

    FastVector visibility = new FastVector();
    visibility.addElement("Recommended");
    visibility.addElement("Not_Recommended");
    Attribute _visibility = new Attribute("visibility", visibility);

    FastVector preassure = new FastVector();
    preassure.addElement("Fair");
    preassure.addElement("Mint");
    Attribute _preassure = new Attribute("preassure", preassure);

    FastVector Class = new FastVector();
    Class.addElement("Recommended");
    Class.addElement("Not_Recommended");
    Attribute _Class = new Attribute("class", Class);

    FastVector atributos = new FastVector(9);
    atributos.addElement(_condition);//from w  ww.j  a  v a2 s . c  o m
    atributos.addElement(_temperature);
    atributos.addElement(_chill);
    atributos.addElement(_direction);
    atributos.addElement(_speed);
    atributos.addElement(_humidity);
    atributos.addElement(_visibility);
    atributos.addElement(_preassure);
    atributos.addElement(_Class);

    ArrayList<Attribute> atributs = new ArrayList<>();
    atributs.add(_condition);
    atributs.add(_temperature);
    atributs.add(_chill);
    atributs.add(_direction);
    atributs.add(_speed);
    atributs.add(_humidity);
    atributs.add(_visibility);
    atributs.add(_preassure);
    atributs.add(_Class);

    //Aqu se crea la instacia, que tiene todos los atributos del modelo
    Instances dataTest = new Instances("TestCases", atributos, 1);
    dataTest.setClassIndex(8);

    Instance setPrueba = new Instance(9);

    int index = -1;
    for (int i = 0; i < 8; i++) {
        index = atributs.get(i).indexOfValue(testCases[i]);
        //System.out.println(i + " " + atributs.get(i)  + " " + index + " " + testCases[i]);
        setPrueba.setValue(atributs.get(i), index);
    }

    //Agregando el set que se desea evaluar.
    dataTest.add(setPrueba);

    //Realizando la Prediccin
    //La instancia es la 0 debido a que es la unica que se encuentra.
    double valorP = clasify.classifyInstance(dataTest.instance(0));
    //get the name of the class value
    String prediccion = dataTest.classAttribute().value((int) valorP);

    return prediccion;
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Converts an instance.//from w w  w  . jav  a 2  s  . com
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    if (m_Debug) {
        log.info("-- Converting instance");
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;
    HashMap<String, Counter> hashKeysEval = null;
    if (!instance.isMissing(m_KeyphrasesAtt)) {
        String keyphrases = instance.stringValue(m_KeyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        hashKeysEval = getGivenKeyphrases(keyphrases, true);
    }

    // Get the phrases for the document
    HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
    int length = getPhrases(hash, instance.stringValue(m_DocumentAtt));
    //   hash = getComposits(hash);

    /* Experimental:
     To compute how many of the manual keyphrases appear in the documents:
            
    log.info("Doc phrases found " + hash.size());
    log.info("Manual keyphrases: ");
    Iterator iter = hashKeyphrases.keySet().iterator();
    int count = 0;
    while (iter.hasNext()) {
       String id = (String)iter.next();
       if (hash.containsKey(id)) {
    count++;
       }
    }
            
    double max_recall = (double)count/(double)hashKeyphrases.size();
            
            
    m_max_recall += max_recall;
    doc++;
    double avg_m_max_recall = m_max_recall/(double)doc;
            
    String file = instance.stringValue(2);
    log.info(count + " out of " + hashKeyphrases.size() + " are in the document ");
    log.info("Max recall : " + avg_m_max_recall + " on " + doc + " documents ");
    */

    // Compute number of extra attributes
    int numFeatures = 5;
    if (m_Debug) {
        if (m_KFused) {
            numFeatures = numFeatures + 1;
        }
    }
    if (m_STDEVfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_NODEfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_LENGTHfeature) {
        numFeatures = numFeatures + 1;
    }

    // Set indices of key attributes
    //int phraseAttIndex = m_DocumentAtt;
    int tfidfAttIndex = m_DocumentAtt + 2;
    int distAttIndex = m_DocumentAtt + 3;
    int probsAttIndex = m_DocumentAtt + numFeatures - 1;
    //int classAttIndex = numFeatures;

    // Go through the phrases and convert them into instances
    Iterator<String> it = hash.keySet().iterator();
    while (it.hasNext()) {
        String id = it.next();
        FastVector phraseInfo = (FastVector) hash.get(id);

        double[] vals = featVals(id, phraseInfo, training, hashKeysEval, hashKeyphrases, length, hash);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(m_ClassifierData);

        // Get probability of a phrase being key phrase
        double[] probs = m_Classifier.distributionForInstance(inst);

        // If simple Naive Bayes used, change here to
        //double prob = probs[1];
        double prob = probs[0];

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures];
        int pos = 0;
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (i == m_DocumentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(id);
                newInst[pos++] = index;

                // Add original version
                String orig = (String) phraseInfo.elementAt(2);

                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(id);
                }
                newInst[pos++] = index;

                // Add TFxIDF
                newInst[pos++] = inst.value(m_TfidfIndex);

                // Add distance
                newInst[pos++] = inst.value(m_FirstOccurIndex);

                // Add other features
                if (m_Debug) {
                    if (m_KFused) {
                        newInst[pos++] = inst.value(m_KeyFreqIndex);
                    }
                }
                if (m_STDEVfeature) {
                    newInst[pos++] = inst.value(m_STDEVIndex);
                }
                if (m_NODEfeature) {
                    newInst[pos++] = inst.value(m_NodeIndex);
                }
                if (m_LENGTHfeature) {
                    newInst[pos++] = inst.value(m_LengthIndex);
                }

                // Add probability 
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == m_KeyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }
        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);
    }

    // Add dummy instances for keyphrases that don't occur
    // in the document
    if (hashKeysEval != null) {
        Iterator<String> phrases = hashKeysEval.keySet().iterator();
        while (phrases.hasNext()) {
            String phrase = phrases.next();
            double[] newInst = new double[instance.numAttributes() + numFeatures];
            int pos = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == m_DocumentAtt) {
                    // log.info("Here: " + phrase);
                    // Add phrase
                    int index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add original version
                    index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add TFxIDF
                    newInst[pos++] = Instance.missingValue();

                    // Add distance
                    newInst[pos++] = Instance.missingValue();

                    // Add other features
                    if (m_Debug) {
                        if (m_KFused) {
                            newInst[pos++] = Instance.missingValue();
                        }
                    }
                    if (m_STDEVfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_NODEfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_LENGTHfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }

                    // Add probability and rank
                    newInst[pos++] = -Double.MAX_VALUE;
                    // newInst[pos++] = Instance.missingValue();
                } else if (i == m_KeyphrasesAtt) {
                    newInst[pos++] = 1; // Keyphrase
                } else {
                    newInst[pos++] = instance.value(i);
                }

                Instance inst = new Instance(instance.weight(), newInst);
                inst.setDataset(outputFormatPeek());
                vector.addElement(inst);
            }

        }
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }
    return vector;
}

From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java

License:Open Source License

@Override
public Dataset mapStringToModel(JsonRequest request) throws ParseException {

    if (request != null && request.getData() != null && request.getData().length > 0) {
        FastVector fvWekaAttributes = new FastVector(2);
        FastVector nil = null;/*w w w .  j  a v  a2  s.c om*/
        Attribute attr0 = new Attribute("text", nil, 0);
        FastVector fv = new FastVector();
        for (String nominal : request.getClassVars()) {
            fv.addElement(nominal);
        }
        Attribute attr1 = new Attribute("class", fv, 1);

        fvWekaAttributes.addElement(attr0);
        fvWekaAttributes.addElement(attr1);

        Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length);
        ins.setClassIndex(1);
        for (Text s : request.getData()) {
            Instance i = new Instance(2);
            i.setValue(attr0, s.getText());
            i.setValue(attr1, s.getTclass());
            ins.add(i);

        }

        return new Dataset(ins);
    }
    return null;
}

From source file:com.tum.classifiertest.FastRfUtils.java

License:Open Source License

/**
 * Produces a random permutation of the values of an attribute in a dataset using Knuth shuffle.
 * <p/>/*from  w ww. ja  v  a2  s .c om*/
 * Copies back the current values of the previously scrambled attribute and uses the given permutation
 * to scramble the values of the new attribute all by copying from the original dataset.
 *
 * @param src      the source dataset
 * @param dst      the scrambled dataset
 * @param attIndex the attribute index
 * @param perm     the random permutation
 *
 * @return fluent
 */
public static Instances scramble(Instances src, Instances dst, final int attIndex, int[] perm) {

    for (int i = 0; i < src.numInstances(); i++) {

        Instance scrambled = dst.instance(i);

        if (attIndex > 0)
            scrambled.setValue(attIndex - 1, src.instance(i).value(attIndex - 1));
        scrambled.setValue(attIndex, src.instance(perm[i]).value(attIndex));
    }

    return dst;
}

From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java

License:Apache License

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}./*from w w w .  ja  v a  2s . c om*/
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
    if (url != null) {

        Instance inst = new SparseInstance(dimension);
        inst.replaceMissingValues(replaceMissingValues);

        inst.setDataset(instances);
        inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
        inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
        Set<String> tokens = new HashSet<String>();

        tokens.addAll(tokenizer(url.uri.getPath()));
        tokens.addAll(tokenizer(url.uri.getQuery()));
        tokens.addAll(tokenizer(url.uri.getFragment()));
        for (String tok : tokens) {
            inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
        }
        return inst;

    } else {
        System.out.println("Input AnthURL for convertion into instance was null.");
        return null;
    }
}

From source file:com.zazhu.BlueHub.BlueHub.java

License:Apache License

/**
 * receives the last reads from the sensors and creates the features we use
 * only the acc x,y,z (either from internal or external sensor)
 * /*from  w  w  w  .j  a  va 2 s  . c  o m*/
 * @param sensorQueue
 * @throws Exception 
 */
private Instance processingSenseData(Queue<String> sensorQueue, char whatSensor) throws Exception {

    BufferedReader reader;
    Instances format;
    Instance newInstance = null;

    Log.d(TAG, "Queue size = " + mQueueSize);

    if (sensorQueue.size() <= 0)
        throw new Exception("Queue empty");

    // create the arrays that will contain the accelerometer data
    // s.x s.y s.z
    double[] sx = new double[sensorQueue.size()];
    double[] sy = new double[sensorQueue.size()];
    double[] sz = new double[sensorQueue.size()];

    String rawReading;
    StringTokenizer st;

    int index;

    if (D)
        Log.e(TAG, "+++ COMPUTING FEATURES +++");

    // 1. collect raw data. what kind of sensing data? external vs. internal
    switch (whatSensor) {
    case EXTERNAL:
        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {
            // FORMAT:
            // "Time_SensorName_SensorNumber_Counter_Xacc_Yacc_Zacc_Xgyro_Ygyro_checksum"
            // position of the values needed: s.x = 4, s.y = 5, s.z = 6
            st = new StringTokenizer(rawReading, FIELD_SEP);
            // not needed data
            for (int i = 0; i < 4; i++)
                st.nextToken();
            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_extern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    case INTERNAL:

        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {

            // FORMAT "Xacc_Yacc_Zacc"
            // position of the values needed: s.x = 0, s.y = 1, s.z = 2
            st = new StringTokenizer(rawReading, FIELD_SEP);

            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_intern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    default:
        if (D)
            Log.e(TAG, "+++ COMPUTING FEATURES: NO VALUE FOR THE SENSOR READING +++");
        break;
    }

    return newInstance;

}