List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:processes.ClusterProcess.java
private Instance toInstance(User user, Instances dataSet) { Instance tempInstance = new Instance(userPoints.numAttributes()); tempInstance.setDataset(userPoints); String userDataString = "0" + user.getTasteString(LastFMDataHandler.getInitialTagCount()); String[] dataArray = userDataString.split(","); for (int index = 0; index < dataArray.length; index++) { tempInstance.setValue(index, Integer.parseInt(dataArray[index])); }// w ww . j a v a 2 s . co m return tempInstance; }
From source file:put.semantic.fcanew.ml.WekaClassifier.java
public static Instance convert(Instance input, Instances src, Instances dst) { Instance result = new Instance(dst.numAttributes()); result.setDataset(dst);// w w w.ja v a 2 s . c o m for (int i = 0; i < dst.numAttributes(); ++i) { Attribute srcAttr = src.attribute(dst.attribute(i).name()); if (srcAttr.isNumeric()) { double val = input.value(srcAttr); result.setValue(i, val); } else { String val = input.stringValue(srcAttr); result.setValue(i, val); } } return result; }
From source file:qa.experiment.ProcessFeatureVector.java
public String trainAndPredict(String[] processNames, String question) throws Exception { FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames); Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1); trainingSet.setClassIndex(bowFeature.size()); int cnt = 0;/*from ww w. j a v a2 s .c o m*/ for (int i = 0; i < arrProcessFeature.size(); i++) { String[] names = arrProcessFeature.get(i).getProcessName().split("\\|"); int sim = isNameFuzzyMatch(processNames, names); if (sim != -1) { // System.out.println("match " + arrProcessFeature.get(i).getProcessName()); ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors(); for (int j = 0; j < featureVector.size(); j++) { Instance trainInstance = new Instance(bowFeature.size() + 1); String[] attrValues = featureVector.get(j).split("\t"); // System.out.println(trainInstance.numAttributes()); // System.out.println(fvWekaAttribute.size()); for (int k = 0; k < bowFeature.size(); k++) { trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k), Integer.parseInt(attrValues[k])); } trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()), processNames[sim]); trainingSet.add(trainInstance); //System.out.println(cnt); cnt++; } } } Classifier cl = new NaiveBayes(); cl.buildClassifier(trainingSet); Instance inst = new Instance(bowFeature.size() + 1); //String[] tokenArr = tokens.toArray(new String[tokens.size()]); for (int j = 0; j < bowFeature.size(); j++) { List<String> tokens = slem.tokenize(question); String[] tokArr = tokens.toArray(new String[tokens.size()]); int freq = getFrequency(bowFeature.get(j), tokArr); inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq); } inst.setDataset(trainingSet); int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst)); return processNames[idxMax]; }
From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java
License:Open Source License
/** * Update the existing classifier with new instance. For online models, it directly updates. For offline learning models, it re-generates the model with updated training set * * @param instance/*from w w w.j a v a 2 s .c o m*/ */ public void updateClassifier(TrainingInstance instance) throws NadeefClassifierException { // transform training instance into real instance Instance wekaInstance = new Instance(numberOfAttributes); wekaInstance.setDataset(instances); // add values from old tuple for (Cell cell : instance.getDirtyTuple().getCells()) { if (isPermitted(cell.getColumn())) { if (isPermitted(cell.getColumn())) { if (cell.getValue() instanceof String) { wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString()); } else { double doubleValue = Double.parseDouble(cell.getValue().toString()); wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue); } } } } // add new value, check its type from dirty value if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) { wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue()); } else { double doubleValue = Double.parseDouble(instance.getUpdatedValue()); } // add similarity wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore()); // add class label wekaInstance.setValue(numberOfAttributes - 1, instance.getLabel().toString()); updateClassifier(wekaInstance); }
From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java
License:Open Source License
/** * Get Prediction for a given instance based on current model * * @param instance// www .ja v a 2 s . co m */ public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException { // transform training instance into real instance Instance wekaInstance = new Instance(numberOfAttributes); wekaInstance.setDataset(instances); // add values from old tuple for (Cell cell : instance.getDirtyTuple().getCells()) { if (isPermitted(cell.getColumn())) { if (cell.getValue() instanceof String) { wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString()); } else { double doubleValue = Double.parseDouble(cell.getValue().toString()); wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue); } } } // add new value, check its type from the dirty value if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) { wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue()); } else { double doubleValue = Double.parseDouble(instance.getUpdatedValue()); } // add similarity wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore()); double[] result = getPrediction(wekaInstance); // now convert this result into readable form ClassificationResult classificationResult = new ClassificationResult(result, wekaInstance.attribute(this.numberOfAttributes - 1)); return classificationResult; }
From source file:qoala.arff.java
public void valueSession(setData f) { vals = new double[cl.Datapoints.size() / 5][6]; //set up Attributes// attributes = new FastVector(); attributes.addElement(new Attribute("att1 SessionID")); attributes.addElement(new Attribute("att2 NumberofClicks")); attributes.addElement(new Attribute("att3 NumberofQueriesInSession")); attributes.addElement(new Attribute("att4 NumberofQueryTermsInSession")); attributes.addElement(new Attribute("att5 NumberofAverageQueryTermsInSession")); attributes.addElement(new Attribute("att6 SessionDuration")); //Creates instances object// dataSet = new Instances("My weka", attributes, 0); // fills the instances with data// for (int k = 0; k < cl.Datapoints.get(cl.Datapoints.size() / 5).getSession(); k++) { vals[k][0] = (double) f.GetSessionRetrieval(k); vals[k][1] = (double) f.GetClick(k); vals[k][2] = (double) f.GetSessionNumber(k); vals[k][3] = (double) f.GetTerms(k); vals[k][4] = (double) f.GetAverageTerms(k); vals[k][5] = (double) f.sessionDurationRetrieval(k); Instance inst = new DenseInstance(6); inst.setValue(0, vals[k][0]); inst.setValue(1, vals[k][1]);/*from ww w . j a v a 2 s . c om*/ inst.setValue(2, vals[k][2]); inst.setValue(3, vals[k][3]); inst.setValue(4, vals[k][4]); inst.setValue(5, vals[k][5]); dataSet.add(inst); } }
From source file:reactivetechnologies.sentigrade.dto.RequestData.java
License:Apache License
/** * /*from ww w . jav a 2 s. c om*/ * @param struct * @param t * @return */ protected Instance buildInstance(Instances struct, Tuple t) { Instance i = new DenseInstance(2); i.setDataset(struct); i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_TEXT_IDX), t.getText()); i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_CLASS_IDX), t.getTextClass()); return i; }
From source file:Reader.Classifyer.java
/** * Converts a list of images into instances * @param images//from ww w . ja v a2s . c om * @return */ private static Instances ImageListToInstances(List<BufferedImage> images) { int letterCount = 0; FastVector letters = new FastVector(); for (String thisLetter : letterVal) { letters.addElement(thisLetter); } FastVector fvNominalVal = new FastVector(ATT_COUNT + 1); for (Integer x = 0; x < ATT_COUNT + 1; x++) { fvNominalVal.addElement(new Attribute(x.toString())); } Instances trainingData = new Instances("letters", fvNominalVal, 6500); trainingData.setClassIndex(ATT_COUNT); for (BufferedImage image : images) { Instance temp = imageToInstance(image); try { temp.setValue(ATT_COUNT, letterCount++ / 24); } catch (Exception ex) { System.out.println(ex + " Thrown in Classifying"); } trainingData.add(temp); } return trainingData; }
From source file:Reader.Classifyer.java
/** * Converts the image into an instance by taking the value of each pixel, the * number of black pixels in each row/col of the image, and the number of strokes * on each row/col.//ww w.j a v a2 s .c o m * @param image - The image to convert * @return The resulting Instance */ private static Instance imageToInstance(BufferedImage image) { BufferedImage newImage = resize(image); int height = newImage.getHeight(); int width = newImage.getWidth(); List<Integer> gaps = new ArrayList<>(); WritableRaster raster = newImage.getRaster(); int[] pixels = new int[1537]; raster.getPixels(0, 0, width, height, pixels); Instance imageInstance = new Instance(ATT_COUNT + 1); for (int i = 0; i < 513; i++) { imageInstance.setValue(i, pixels[i] / 250); } int sum; for (int i = 0; i < height; i++) { sum = 0; for (int j = 0; j < width; j++) { sum += pixels[(i * width) + j]; } imageInstance.setValue(i + 512, sum / 255); } for (int i = 0; i < width; i++) { sum = 0; for (int j = 0; j < height; j++) { sum += pixels[(j * width) + i]; } imageInstance.setValue(i + height + 512, sum / 255); } int offset = 512 + height + width; // Extra weight added to the number of strokes because it is less likely // to be effected by noise in the image // Strokes across the x int[] rowPixels = new int[width * 3]; for (int i = 0; i < height; i++) { raster.getPixels(0, i, width, 1, rowPixels); imageInstance.setValue(i + offset, getStrokes(rowPixels) * 20); } offset += height; // Strokes accross the y int[] colPixels = new int[height * 3]; for (int i = 0; i < width; i++) { raster.getPixels(i, 0, 1, height, colPixels); imageInstance.setValue(i + offset, getStrokes(colPixels) * 20); } return imageInstance; }
From source file:ru.ksu.niimm.cll.mocassin.crawl.analyzer.relation.impl.WekaBasedNavigationalRelationClassifier.java
License:Open Source License
/** * {@inheritDoc}//from ww w . ja va 2 s.c o m */ @Override public Prediction predict(Reference reference, Graph<StructuralElement, Reference> graph) { StructuralElement from = graph.getSource(reference); MocassinOntologyClasses fromType = from.getPredictedClass(); StructuralElement to = graph.getDest(reference); MocassinOntologyClasses toType = to.getPredictedClass(); long documentSize = reference.getDocument().getSize(); float normalizedStartDistance = ((float) from.getGateStartOffset() - to.getGateStartOffset()) / documentSize; float normalizedEndDistance = ((float) from.getGateEndOffset() - to.getGateEndOffset()) / documentSize; Instance instance = new Instance(trainingSetHeader.numAttributes()); instance.setDataset(trainingSetHeader); instance.setValue(0, fromType.toString()); instance.setValue(1, toType.toString()); instance.setValue(2, normalizedStartDistance); instance.setValue(3, normalizedEndDistance); for (int i = 4; i < trainingSetHeader.numAttributes() - 1; i++) { String attrName = trainingSetHeader.attribute(i).name(); String word = attrName.substring(attrName.indexOf(ATTRIBUTE_NAME_DELIMITER) + 1); instance.setValue(i, reference.getSentenceTokens().contains(word) ? 1 : 0); } try { double[] distribution = classifier.distributionForInstance(instance); Prediction prediction; if (distribution[0] > distribution[1]) { prediction = new Prediction(MocassinOntologyRelations.REFERS_TO, distribution[0]); } else { prediction = new Prediction(MocassinOntologyRelations.DEPENDS_ON, distribution[1]); } return prediction; } catch (Exception e) { logger.error("Couldn't classify a reference with id='{}' in a document='{}'; null will be returned", format("%d/%s", reference.getId(), reference.getDocument().getUri()), e); return null; } }