List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java
License:Open Source License
/** * Get Prediction for a given instance based on current model * * @param instance// w w w. j av a2 s. c om */ public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException { // transform training instance into real instance Instance wekaInstance = new Instance(numberOfAttributes); wekaInstance.setDataset(instances); // add values from old tuple for (Cell cell : instance.getDirtyTuple().getCells()) { if (isPermitted(cell.getColumn())) { if (cell.getValue() instanceof String) { wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString()); } else { double doubleValue = Double.parseDouble(cell.getValue().toString()); wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue); } } } // add new value, check its type from the dirty value if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) { wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue()); } else { double doubleValue = Double.parseDouble(instance.getUpdatedValue()); } // add similarity wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore()); double[] result = getPrediction(wekaInstance); // now convert this result into readable form ClassificationResult classificationResult = new ClassificationResult(result, wekaInstance.attribute(this.numberOfAttributes - 1)); return classificationResult; }
From source file:qa.qcri.nadeef.core.utils.classification.RandomForestClassifier.java
License:Open Source License
protected void updateClassifier(Instance instance) throws NadeefClassifierException { instances.add(instance);/*from www . j av a 2 s . c o m*/ instance.setDataset(instances); try { classifier.buildClassifier(instances); } catch (Exception e) { throw new NadeefClassifierException("RandomForest cannot be built with new instance", e); } }
From source file:reactivetechnologies.sentigrade.dto.RequestData.java
License:Apache License
/** * // ww w. j a va 2s . c o m * @param struct * @param t * @return */ protected Instance buildInstance(Instances struct, Tuple t) { Instance i = new DenseInstance(2); i.setDataset(struct); i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_TEXT_IDX), t.getText()); i.setValue(struct.attribute(ClassificationModelEngine.CLASSIFIER_ATTRIB_CLASS_IDX), t.getTextClass()); return i; }
From source file:ru.ksu.niimm.cll.mocassin.crawl.analyzer.relation.impl.WekaBasedNavigationalRelationClassifier.java
License:Open Source License
/** * {@inheritDoc}//from ww w . j av a2s. c o m */ @Override public Prediction predict(Reference reference, Graph<StructuralElement, Reference> graph) { StructuralElement from = graph.getSource(reference); MocassinOntologyClasses fromType = from.getPredictedClass(); StructuralElement to = graph.getDest(reference); MocassinOntologyClasses toType = to.getPredictedClass(); long documentSize = reference.getDocument().getSize(); float normalizedStartDistance = ((float) from.getGateStartOffset() - to.getGateStartOffset()) / documentSize; float normalizedEndDistance = ((float) from.getGateEndOffset() - to.getGateEndOffset()) / documentSize; Instance instance = new Instance(trainingSetHeader.numAttributes()); instance.setDataset(trainingSetHeader); instance.setValue(0, fromType.toString()); instance.setValue(1, toType.toString()); instance.setValue(2, normalizedStartDistance); instance.setValue(3, normalizedEndDistance); for (int i = 4; i < trainingSetHeader.numAttributes() - 1; i++) { String attrName = trainingSetHeader.attribute(i).name(); String word = attrName.substring(attrName.indexOf(ATTRIBUTE_NAME_DELIMITER) + 1); instance.setValue(i, reference.getSentenceTokens().contains(word) ? 1 : 0); } try { double[] distribution = classifier.distributionForInstance(instance); Prediction prediction; if (distribution[0] > distribution[1]) { prediction = new Prediction(MocassinOntologyRelations.REFERS_TO, distribution[0]); } else { prediction = new Prediction(MocassinOntologyRelations.DEPENDS_ON, distribution[1]); } return prediction; } catch (Exception e) { logger.error("Couldn't classify a reference with id='{}' in a document='{}'; null will be returned", format("%d/%s", reference.getId(), reference.getDocument().getUri()), e); return null; } }
From source file:se.de.hu_berlin.informatik.faultlocalizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ILocalizerCache<T> localizer, ComputationStrategies strategy) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(localizer.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<>(); tf.add("t");/*w w w. jav a 2 s. c o m*/ tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<>(); final ArrayList<Attribute> attributeList = new ArrayList<>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : localizer.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:se.de.hu_berlin.informatik.stardust.localizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ISpectra<T> spectra) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<String>(); tf.add("t");// w ww .j ava 2 s .c o m tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>(); final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : spectra.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:semana07.IrisKnn.java
public static void main(String[] args) throws FileNotFoundException, IOException, Exception { // DEFININDO CONJUNTO DE TREINAMENTO // - Definindo o leitor do arquivo arff FileReader baseIris = new FileReader("iris.arff"); // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff" Instances iris = new Instances(baseIris); // - Definindo o indice do atributo classe iris.setClassIndex(4);// www. j a v a 2 s .c o m iris = iris.resample(new Debug.Random()); Instances irisTreino = iris.trainCV(3, 0); Instances irisTeste = iris.testCV(3, 0); // DEFININDO EXEMPLO DESCONHECIDO //5.9,3.0,5.1,1.8,Iris-virginica Instance irisInst = new DenseInstance(iris.numAttributes()); irisInst.setDataset(iris); irisInst.setValue(0, 5.9); irisInst.setValue(1, 3.0); irisInst.setValue(2, 5.1); irisInst.setValue(3, 1.8); // DEFININDO ALGORITMO DE CLASSIFICAO //NN IBk vizinhoIris = new IBk(); //kNN IBk knnIris = new IBk(3); // MONTANDO CLASSIFICADOR //NN vizinhoIris.buildClassifier(irisTreino); //kNN knnIris.buildClassifier(irisTreino); // Definindo arquivo a ser escrito FileWriter writer = new FileWriter("iris.csv"); // Escrevendo o cabealho do arquivo writer.append("Classe Real;Resultado NN;Resultado kNN"); writer.append(System.lineSeparator()); // Sada CLI / Console System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho for (int i = 0; i <= irisTeste.numInstances() - 1; i++) { Instance testeIris = irisTeste.instance(i); // Sada CLI / Console do valor original System.out.print(testeIris.stringValue(4) + ";"); // Escrevendo o valor original no arquivo writer.append(testeIris.stringValue(4) + ";"); // Definindo o atributo classe como indefinido testeIris.setClassMissing(); // CLASSIFICANDO A INSTANCIA // NN double respostaVizinho = vizinhoIris.classifyInstance(testeIris); testeIris.setValue(4, respostaVizinho); String stringVizinho = testeIris.stringValue(4); //kNN double respostaKnn = knnIris.classifyInstance(testeIris); // Atribuindo respota ao valor do atributo do index 4(classe) testeIris.setValue(4, respostaKnn); String stringKnn = testeIris.stringValue(4); // Adicionando resultado ao grupo de instancia iris iris.add(irisInst); //Escrevendo os resultados no arquivo iris.csv writer.append(stringVizinho + ";"); writer.append(stringKnn + ";"); writer.append(System.lineSeparator()); // Exibindo via CLI / Console o resultado System.out.print(respostaVizinho + ";"); System.out.print(respostaKnn + ";"); System.out.println(testeIris.stringValue(4)); } writer.flush(); writer.close(); }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
@Override public double[] distributionForInstance(Instance p_Instance) throws Exception { double[] probs = new double[p_Instance.numClasses()]; Instance newInst = this.filterInstance(p_Instance); newInst.setDataset(this.m_OutputFormat); newInst.setMissing(newInst.classAttribute()); if (this.m_Classifiers == null) { return new double[] { 1 }; }//www.j ava 2 s . c om if (this.m_Classifiers.length == 1) { return this.m_Classifiers[0].distributionForInstance(newInst); } for (int i = 0; i < this.m_Classifiers.length; i++) { if (this.m_Classifiers[i] != null) { double[] current = this.m_Classifiers[i].distributionForInstance(newInst); for (int j = 0; j < this.m_ClassAttribute.numValues(); j++) { if (j == i) { probs[j] += current[1]; } else { probs[j] += current[0]; } } } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); return probs; } else { return m_ZeroR.distributionForInstance(newInst); } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
@Override public void buildClassifier(Instances p_Instances) throws Exception { Instances newInsts = null;//from w ww .ja va 2 s . c om if (this.m_Classifier == null) { throw new IllegalStateException("No base classifier has been set!"); } this.m_ZeroR = new ZeroR(); this.m_ZeroR.buildClassifier(p_Instances); this.m_ClassAttribute = p_Instances.classAttribute(); this.getOutputFormat(p_Instances); int numClassifiers = p_Instances.numClasses(); switch (numClassifiers) { case 1: this.m_Classifiers = null; break; case 2: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1); newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = this.filterInstance(p_Instances.instance(i)); inst.setDataset(newInsts); newInsts.add(inst); } this.m_Classifiers[0].buildClassifier(newInsts); break; default: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers); Hashtable<String, ArrayList<Double>> id2Classes = null; if (this.m_IndexOfID >= 0) { id2Classes = new Hashtable<String, ArrayList<Double>>(); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); String id = inst.stringValue(this.m_IndexOfID); if (!id2Classes.containsKey(id)) { id2Classes.put(id, new ArrayList<Double>()); } id2Classes.get(id).add(inst.classValue()); } } for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) { newInsts = this.genInstances(p_Instances, classIdx, id2Classes); this.m_Classifiers[classIdx].buildClassifier(newInsts); } } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * generate instances for classifier classIdx * * @param p_Instances//from w w w.j a va 2 s .c o m * input instances * @param p_ClassIndex * class index * @param p_ID2Classes * instance ids * @return new instances */ protected Instances genInstances(Instances p_Instances, double p_ClassIndex, Hashtable<String, ArrayList<Double>> p_ID2Classes) { Instances newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); Instance newInst = null; if (SparseInstance.class.isInstance(inst)) { newInst = new SparseInstance(inst); } else { newInst = new Instance(inst); } if (newInst.value(p_Instances.classIndex()) == p_ClassIndex) { newInst.setValue(inst.classIndex(), 1); } else { if (p_ID2Classes == null || !p_ID2Classes.get(inst.stringValue(this.m_IndexOfID)) .contains(new Double(p_ClassIndex))) { newInst.setValue(inst.classIndex(), 0); } else { continue; } } newInst.deleteAttributeAt(this.m_IndexOfID); newInst.setDataset(newInsts); newInsts.add(newInst); } return newInsts; }