Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:processes.ClusterProcess.java

private Instance toInstance(User user, Instances dataSet) {
    Instance tempInstance = new Instance(userPoints.numAttributes());
    tempInstance.setDataset(userPoints);
    String userDataString = "0" + user.getTasteString(LastFMDataHandler.getInitialTagCount());
    String[] dataArray = userDataString.split(",");
    for (int index = 0; index < dataArray.length; index++) {
        tempInstance.setValue(index, Integer.parseInt(dataArray[index]));
    }// w  ww .  j  a v a  2 s .  co  m
    return tempInstance;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

public static Instance convert(Instance input, Instances src, Instances dst) {
    Instance result = new Instance(dst.numAttributes());
    result.setDataset(dst);// w  w  w.ja v  a 2  s  .  c  o  m
    for (int i = 0; i < dst.numAttributes(); ++i) {
        Attribute srcAttr = src.attribute(dst.attribute(i).name());
        if (srcAttr.isNumeric()) {
            double val = input.value(srcAttr);
            result.setValue(i, val);
        } else {
            String val = input.stringValue(srcAttr);
            result.setValue(i, val);
        }
    }
    return result;
}

From source file:qa.experiment.ProcessFeatureVector.java

public String trainAndPredict(String[] processNames, String question) throws Exception {
    FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames);
    Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1);
    trainingSet.setClassIndex(bowFeature.size());

    int cnt = 0;/*from   ww w.  j a v a2  s  .c o  m*/
    for (int i = 0; i < arrProcessFeature.size(); i++) {
        String[] names = arrProcessFeature.get(i).getProcessName().split("\\|");
        int sim = isNameFuzzyMatch(processNames, names);
        if (sim != -1) {
            // System.out.println("match " + arrProcessFeature.get(i).getProcessName());
            ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors();
            for (int j = 0; j < featureVector.size(); j++) {
                Instance trainInstance = new Instance(bowFeature.size() + 1);
                String[] attrValues = featureVector.get(j).split("\t");
                // System.out.println(trainInstance.numAttributes());
                // System.out.println(fvWekaAttribute.size());
                for (int k = 0; k < bowFeature.size(); k++) {
                    trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k),
                            Integer.parseInt(attrValues[k]));
                }
                trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()),
                        processNames[sim]);
                trainingSet.add(trainInstance);

                //System.out.println(cnt);
                cnt++;
            }
        }
    }

    Classifier cl = new NaiveBayes();
    cl.buildClassifier(trainingSet);
    Instance inst = new Instance(bowFeature.size() + 1);
    //String[] tokenArr = tokens.toArray(new String[tokens.size()]);
    for (int j = 0; j < bowFeature.size(); j++) {
        List<String> tokens = slem.tokenize(question);
        String[] tokArr = tokens.toArray(new String[tokens.size()]);
        int freq = getFrequency(bowFeature.get(j), tokArr);
        inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq);
    }

    inst.setDataset(trainingSet);
    int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst));
    return processNames[idxMax];
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Update the existing classifier with new instance. For online models, it directly updates. For offline learning models, it re-generates the model with updated training set
 *
 * @param instance/*from  w w w.j a v  a 2  s  .c o  m*/
 */
public void updateClassifier(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (isPermitted(cell.getColumn())) {
                if (cell.getValue() instanceof String) {
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
                } else {
                    double doubleValue = Double.parseDouble(cell.getValue().toString());
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
                }
            }
        }
    }

    // add new value, check its type from dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());
    // add class label
    wekaInstance.setValue(numberOfAttributes - 1, instance.getLabel().toString());

    updateClassifier(wekaInstance);
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Get Prediction for a given instance based on current model
 *
 * @param instance//  www .ja v  a 2  s  . co m
 */
public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (cell.getValue() instanceof String) {
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
            } else {
                double doubleValue = Double.parseDouble(cell.getValue().toString());
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
            }
        }
    }

    // add new value, check its type from the dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());

    double[] result = getPrediction(wekaInstance);
    // now convert this result into readable form
    ClassificationResult classificationResult = new ClassificationResult(result,
            wekaInstance.attribute(this.numberOfAttributes - 1));
    return classificationResult;
}

From source file:qoala.arff.java

public void valueSession(setData f) {
    vals = new double[cl.Datapoints.size() / 5][6];
    //set up Attributes//
    attributes = new FastVector();

    attributes.addElement(new Attribute("att1 SessionID"));
    attributes.addElement(new Attribute("att2 NumberofClicks"));
    attributes.addElement(new Attribute("att3 NumberofQueriesInSession"));
    attributes.addElement(new Attribute("att4 NumberofQueryTermsInSession"));
    attributes.addElement(new Attribute("att5 NumberofAverageQueryTermsInSession"));
    attributes.addElement(new Attribute("att6 SessionDuration"));
    //Creates instances object//
    dataSet = new Instances("My weka", attributes, 0);

    // fills the instances with data//
    for (int k = 0; k < cl.Datapoints.get(cl.Datapoints.size() / 5).getSession(); k++) {

        vals[k][0] = (double) f.GetSessionRetrieval(k);

        vals[k][1] = (double) f.GetClick(k);

        vals[k][2] = (double) f.GetSessionNumber(k);

        vals[k][3] = (double) f.GetTerms(k);

        vals[k][4] = (double) f.GetAverageTerms(k);

        vals[k][5] = (double) f.sessionDurationRetrieval(k);

        Instance inst = new DenseInstance(6);
        inst.setValue(0, vals[k][0]);
        inst.setValue(1, vals[k][1]);/*from   ww w .  j a  v  a 2  s .  c om*/
        inst.setValue(2, vals[k][2]);
        inst.setValue(3, vals[k][3]);
        inst.setValue(4, vals[k][4]);
        inst.setValue(5, vals[k][5]);

        dataSet.add(inst);

    }

}

From source file:reactivetechnologies.sentigrade.dto.RequestData.java

License:Apache License

/**
 * /*from ww  w  . jav  a 2 s. c om*/
 * @param struct
 * @param t
 * @return
 */
protected Instance buildInstance(Instances struct, Tuple t) {
    Instance i = new DenseInstance(2);
    i.setDataset(struct);
    i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_TEXT_IDX), t.getText());
    i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_CLASS_IDX), t.getTextClass());
    return i;
}

From source file:Reader.Classifyer.java

/**
 * Converts a list of images into instances
 * @param images//from  ww w .  ja  v a2s  . c  om
 * @return 
 */
private static Instances ImageListToInstances(List<BufferedImage> images) {
    int letterCount = 0;
    FastVector letters = new FastVector();
    for (String thisLetter : letterVal) {
        letters.addElement(thisLetter);
    }

    FastVector fvNominalVal = new FastVector(ATT_COUNT + 1);
    for (Integer x = 0; x < ATT_COUNT + 1; x++) {
        fvNominalVal.addElement(new Attribute(x.toString()));
    }

    Instances trainingData = new Instances("letters", fvNominalVal, 6500);
    trainingData.setClassIndex(ATT_COUNT);

    for (BufferedImage image : images) {
        Instance temp = imageToInstance(image);
        try {
            temp.setValue(ATT_COUNT, letterCount++ / 24);
        } catch (Exception ex) {
            System.out.println(ex + " Thrown in Classifying");
        }

        trainingData.add(temp);
    }

    return trainingData;
}

From source file:Reader.Classifyer.java

/**
 * Converts the image into an instance by taking the value of each pixel, the
 * number of black pixels in each row/col of the image, and the number of strokes
 * on each row/col.//ww w.j a v a2  s .c  o m
 * @param image - The image to convert
 * @return The resulting Instance
 */
private static Instance imageToInstance(BufferedImage image) {
    BufferedImage newImage = resize(image);
    int height = newImage.getHeight();
    int width = newImage.getWidth();
    List<Integer> gaps = new ArrayList<>();
    WritableRaster raster = newImage.getRaster();

    int[] pixels = new int[1537];
    raster.getPixels(0, 0, width, height, pixels);

    Instance imageInstance = new Instance(ATT_COUNT + 1);

    for (int i = 0; i < 513; i++) {
        imageInstance.setValue(i, pixels[i] / 250);
    }

    int sum;
    for (int i = 0; i < height; i++) {
        sum = 0;
        for (int j = 0; j < width; j++) {
            sum += pixels[(i * width) + j];
        }

        imageInstance.setValue(i + 512, sum / 255);
    }

    for (int i = 0; i < width; i++) {
        sum = 0;
        for (int j = 0; j < height; j++) {
            sum += pixels[(j * width) + i];
        }

        imageInstance.setValue(i + height + 512, sum / 255);
    }

    int offset = 512 + height + width;

    // Extra weight added to the number of strokes because it is less likely
    // to be effected by noise in the image
    // Strokes across the x
    int[] rowPixels = new int[width * 3];
    for (int i = 0; i < height; i++) {
        raster.getPixels(0, i, width, 1, rowPixels);
        imageInstance.setValue(i + offset, getStrokes(rowPixels) * 20);
    }

    offset += height;

    // Strokes accross the y
    int[] colPixels = new int[height * 3];
    for (int i = 0; i < width; i++) {
        raster.getPixels(i, 0, 1, height, colPixels);
        imageInstance.setValue(i + offset, getStrokes(colPixels) * 20);
    }

    return imageInstance;
}

From source file:ru.ksu.niimm.cll.mocassin.crawl.analyzer.relation.impl.WekaBasedNavigationalRelationClassifier.java

License:Open Source License

/**
 * {@inheritDoc}//from   ww  w  .  ja va  2  s.c o m
 */
@Override
public Prediction predict(Reference reference, Graph<StructuralElement, Reference> graph) {
    StructuralElement from = graph.getSource(reference);
    MocassinOntologyClasses fromType = from.getPredictedClass();
    StructuralElement to = graph.getDest(reference);
    MocassinOntologyClasses toType = to.getPredictedClass();
    long documentSize = reference.getDocument().getSize();
    float normalizedStartDistance = ((float) from.getGateStartOffset() - to.getGateStartOffset())
            / documentSize;

    float normalizedEndDistance = ((float) from.getGateEndOffset() - to.getGateEndOffset()) / documentSize;
    Instance instance = new Instance(trainingSetHeader.numAttributes());
    instance.setDataset(trainingSetHeader);
    instance.setValue(0, fromType.toString());
    instance.setValue(1, toType.toString());
    instance.setValue(2, normalizedStartDistance);
    instance.setValue(3, normalizedEndDistance);
    for (int i = 4; i < trainingSetHeader.numAttributes() - 1; i++) {
        String attrName = trainingSetHeader.attribute(i).name();
        String word = attrName.substring(attrName.indexOf(ATTRIBUTE_NAME_DELIMITER) + 1);
        instance.setValue(i, reference.getSentenceTokens().contains(word) ? 1 : 0);
    }
    try {
        double[] distribution = classifier.distributionForInstance(instance);
        Prediction prediction;
        if (distribution[0] > distribution[1]) {
            prediction = new Prediction(MocassinOntologyRelations.REFERS_TO, distribution[0]);
        } else {
            prediction = new Prediction(MocassinOntologyRelations.DEPENDS_ON, distribution[1]);
        }
        return prediction;
    } catch (Exception e) {
        logger.error("Couldn't classify a reference with id='{}' in a document='{}'; null will be returned",
                format("%d/%s", reference.getId(), reference.getDocument().getUri()), e);
        return null;
    }
}