List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:reactivetechnologies.sentigrade.dto.RequestData.java
License:Apache License
/** * Construct a 2-attribute text instance, with the class attribute at last. * @return//from w ww .ja v a 2 s . c om */ public Instances toInstances() { Assert.notEmpty(getDataSet(), "'dataSet' is empty or null"); Instances data = getStructure(); Instance i; for (Tuple t : getDataSet()) { if (StringUtils.isEmpty(t.textClass) || t.text == null) continue; i = buildInstance(data, t); data.add(i); } return data; }
From source file:reactivetechnologies.sentigrade.dto.VectorRequestData.java
License:Apache License
@Override public Instances toInstances() { Assert.notEmpty(getDataSet(), "'dataSet' is empty or null"); final Instances data = getStructure(); BuildInstancesDelegate builder = analyzer.newInstancesBuilder(); int count = 0; log.info("Start transforming to vector. This may take some time .."); long start = System.currentTimeMillis(); for (Tuple t : getDataSet()) { if (StringUtils.isEmpty(t.textClass) || t.text == null) continue; builder.submitInstance(data, t); count++;/* w ww . j a v a 2s. c o m*/ } int pct = count / 10; for (int i = 0; i < count; i++) { try { data.add(builder.pollInstance()); if (i > 0 && i % pct == 0) log.info("Processed " + (10 * (i / pct)) + "% .."); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new OperationFailedUnexpectedly(e); } } long time = System.currentTimeMillis() - start; log.info("End transformation. Time taken: " + ConfigUtil.toTimeElapsedString(time)); return data; }
From source file:Reader.Classifyer.java
/** * Converts a list of images into instances * @param images//from w w w.j a v a 2 s. c o m * @return */ private static Instances ImageListToInstances(List<BufferedImage> images) { int letterCount = 0; FastVector letters = new FastVector(); for (String thisLetter : letterVal) { letters.addElement(thisLetter); } FastVector fvNominalVal = new FastVector(ATT_COUNT + 1); for (Integer x = 0; x < ATT_COUNT + 1; x++) { fvNominalVal.addElement(new Attribute(x.toString())); } Instances trainingData = new Instances("letters", fvNominalVal, 6500); trainingData.setClassIndex(ATT_COUNT); for (BufferedImage image : images) { Instance temp = imageToInstance(image); try { temp.setValue(ATT_COUNT, letterCount++ / 24); } catch (Exception ex) { System.out.println(ex + " Thrown in Classifying"); } trainingData.add(temp); } return trainingData; }
From source file:se.de.hu_berlin.informatik.faultlocalizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ILocalizerCache<T> localizer, ComputationStrategies strategy) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(localizer.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<>(); tf.add("t");/* w w w .ja v a2 s.com*/ tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<>(); final ArrayList<Attribute> attributeList = new ArrayList<>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : localizer.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:se.de.hu_berlin.informatik.stardust.localizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ISpectra<T> spectra) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<String>(); tf.add("t");/* ww w.j ava2 s. c o m*/ tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>(); final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : spectra.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:semana07.IrisKnn.java
public static void main(String[] args) throws FileNotFoundException, IOException, Exception { // DEFININDO CONJUNTO DE TREINAMENTO // - Definindo o leitor do arquivo arff FileReader baseIris = new FileReader("iris.arff"); // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff" Instances iris = new Instances(baseIris); // - Definindo o indice do atributo classe iris.setClassIndex(4);/*from w w w . ja va 2 s . c om*/ iris = iris.resample(new Debug.Random()); Instances irisTreino = iris.trainCV(3, 0); Instances irisTeste = iris.testCV(3, 0); // DEFININDO EXEMPLO DESCONHECIDO //5.9,3.0,5.1,1.8,Iris-virginica Instance irisInst = new DenseInstance(iris.numAttributes()); irisInst.setDataset(iris); irisInst.setValue(0, 5.9); irisInst.setValue(1, 3.0); irisInst.setValue(2, 5.1); irisInst.setValue(3, 1.8); // DEFININDO ALGORITMO DE CLASSIFICAO //NN IBk vizinhoIris = new IBk(); //kNN IBk knnIris = new IBk(3); // MONTANDO CLASSIFICADOR //NN vizinhoIris.buildClassifier(irisTreino); //kNN knnIris.buildClassifier(irisTreino); // Definindo arquivo a ser escrito FileWriter writer = new FileWriter("iris.csv"); // Escrevendo o cabealho do arquivo writer.append("Classe Real;Resultado NN;Resultado kNN"); writer.append(System.lineSeparator()); // Sada CLI / Console System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho for (int i = 0; i <= irisTeste.numInstances() - 1; i++) { Instance testeIris = irisTeste.instance(i); // Sada CLI / Console do valor original System.out.print(testeIris.stringValue(4) + ";"); // Escrevendo o valor original no arquivo writer.append(testeIris.stringValue(4) + ";"); // Definindo o atributo classe como indefinido testeIris.setClassMissing(); // CLASSIFICANDO A INSTANCIA // NN double respostaVizinho = vizinhoIris.classifyInstance(testeIris); testeIris.setValue(4, respostaVizinho); String stringVizinho = testeIris.stringValue(4); //kNN double respostaKnn = knnIris.classifyInstance(testeIris); // Atribuindo respota ao valor do atributo do index 4(classe) testeIris.setValue(4, respostaKnn); String stringKnn = testeIris.stringValue(4); // Adicionando resultado ao grupo de instancia iris iris.add(irisInst); //Escrevendo os resultados no arquivo iris.csv writer.append(stringVizinho + ";"); writer.append(stringKnn + ";"); writer.append(System.lineSeparator()); // Exibindo via CLI / Console o resultado System.out.print(respostaVizinho + ";"); System.out.print(respostaKnn + ";"); System.out.println(testeIris.stringValue(4)); } writer.flush(); writer.close(); }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
@Override public void buildClassifier(Instances p_Instances) throws Exception { Instances newInsts = null; if (this.m_Classifier == null) { throw new IllegalStateException("No base classifier has been set!"); }//from ww w . j a v a 2s .c om this.m_ZeroR = new ZeroR(); this.m_ZeroR.buildClassifier(p_Instances); this.m_ClassAttribute = p_Instances.classAttribute(); this.getOutputFormat(p_Instances); int numClassifiers = p_Instances.numClasses(); switch (numClassifiers) { case 1: this.m_Classifiers = null; break; case 2: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1); newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = this.filterInstance(p_Instances.instance(i)); inst.setDataset(newInsts); newInsts.add(inst); } this.m_Classifiers[0].buildClassifier(newInsts); break; default: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers); Hashtable<String, ArrayList<Double>> id2Classes = null; if (this.m_IndexOfID >= 0) { id2Classes = new Hashtable<String, ArrayList<Double>>(); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); String id = inst.stringValue(this.m_IndexOfID); if (!id2Classes.containsKey(id)) { id2Classes.put(id, new ArrayList<Double>()); } id2Classes.get(id).add(inst.classValue()); } } for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) { newInsts = this.genInstances(p_Instances, classIdx, id2Classes); this.m_Classifiers[classIdx].buildClassifier(newInsts); } } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * generate instances for classifier classIdx * * @param p_Instances// w w w. j a v a2s . c o m * input instances * @param p_ClassIndex * class index * @param p_ID2Classes * instance ids * @return new instances */ protected Instances genInstances(Instances p_Instances, double p_ClassIndex, Hashtable<String, ArrayList<Double>> p_ID2Classes) { Instances newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); Instance newInst = null; if (SparseInstance.class.isInstance(inst)) { newInst = new SparseInstance(inst); } else { newInst = new Instance(inst); } if (newInst.value(p_Instances.classIndex()) == p_ClassIndex) { newInst.setValue(inst.classIndex(), 1); } else { if (p_ID2Classes == null || !p_ID2Classes.get(inst.stringValue(this.m_IndexOfID)) .contains(new Double(p_ClassIndex))) { newInst.setValue(inst.classIndex(), 0); } else { continue; } } newInst.deleteAttributeAt(this.m_IndexOfID); newInst.setDataset(newInsts); newInsts.add(newInst); } return newInsts; }
From source file:sg.edu.nus.comp.nlp.ims.io.CWekaLexeltWriter.java
License:Open Source License
@Override public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { String relation = p_Lexelt.getID(); FastVector attributes = new FastVector(); int capacity = p_Lexelt.size(); IStatistic stat = p_Lexelt.getStatistic(); Attribute ids = new Attribute("#ID"); attributes.addElement(ids);/*from ww w .ja v a2 s . c o m*/ int keySize = stat.getKeys().size(); for (int keyIdx = 0; keyIdx < keySize; keyIdx++) { String key = stat.getKey(keyIdx); String type = stat.getType(keyIdx); if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) { attributes.addElement(new Attribute(key)); } else { FastVector attributeValues = new FastVector(); List<String> values = stat.getValue(keyIdx); for (String value : values) { attributeValues.addElement(value); } if (attributeValues.size() == 0) { throw new IllegalStateException("No attribute specified."); } attributes.addElement(new Attribute(key, attributeValues)); } } FastVector attributeValues = new FastVector(); for (String tag : stat.getTags()) { attributeValues.addElement(tag); } attributes.addElement(new Attribute("#TAG", attributeValues)); Instances instances = new Instances(relation, attributes, capacity); for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) { IInstance instance = p_Lexelt.getInstance(instIdx); int keyIdx = 0; double value; IFeature feature; int featureSize = instance.size(); Hashtable<Integer, Double> features = new Hashtable<Integer, Double>(); ArrayList<Integer> exist = new ArrayList<Integer>(); for (int featIdx = 0; featIdx < featureSize; featIdx++) { feature = instance.getFeature(featIdx); keyIdx = stat.getIndex(feature.getKey()); if (keyIdx < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { value = Double.parseDouble(feature.getValue()); } else if (ABinaryFeature.class.isInstance(feature)) { value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue()); } else { String fv = feature.getValue(); if (fv == null || !stat.contains(keyIdx, fv)) { fv = stat.getDefaultValue(); } value = instances.attribute(keyIdx + 1).indexOfValue(fv); } features.put(keyIdx + 1, value); exist.add(keyIdx + 1); } exist.add(keySize + 1); Collections.sort(exist); double[] attValues = new double[keySize + 2]; ids.addStringValue(instance.getID()); attValues[0] = ids.indexOfValue(instance.getID()); int begin, end = -1; for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) { begin = end + 1; end = exist.get(valueIdx); for (int i = begin; i < end; i++) { if (instances.attribute(i).isNumeric()) { attValues[i] = 0; } else { attValues[i] = instances.attribute(i).indexOfValue("0"); } } if (end <= keySize) { attValues[end] = features.get(end); } } for (String tag : instance.getTag()) { if (tag.equals("'?'") || tag.equals("?")) { attValues[keySize + 1] = Instance.missingValue(); } else { attValues[keySize + 1] = instances.attribute(keySize + 1).indexOfValue(tag); } Instance ins = new Instance(1, attValues); instances.add(ins); } if (instance.getTag().size() == 0) { attValues[keySize + 1] = Instance.missingValue(); Instance ins = new Instance(1, attValues); instances.add(ins); } } return instances; }
From source file:sg.edu.nus.comp.nlp.ims.io.CWekaSparseLexeltWriter.java
License:Open Source License
@Override public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { String relation = p_Lexelt.getID(); FastVector attributes = new FastVector(); int capacity = p_Lexelt.size(); IStatistic stat = p_Lexelt.getStatistic(); Attribute ids = new Attribute("#ID"); attributes.addElement(ids);//from www .j a va2s . co m int keySize = stat.getKeys().size(); for (int keyIdx = 0; keyIdx < keySize; keyIdx++) { String key = stat.getKey(keyIdx); String type = stat.getType(keyIdx); if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) { attributes.addElement(new Attribute(key)); } else { FastVector attributeValues = new FastVector(); List<String> values = stat.getValue(keyIdx); for (String value : values) { attributeValues.addElement(value); } if (attributeValues.size() == 0) { throw new IllegalStateException("No attribute specified."); } attributes.addElement(new Attribute(key, attributeValues)); } } FastVector attributeValues = new FastVector(); for (String tag : stat.getTags()) { attributeValues.addElement(tag); } attributes.addElement(new Attribute("#TAG", attributeValues)); Instances instances = new Instances(relation, attributes, capacity); for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) { IInstance instance = p_Lexelt.getInstance(instIdx); int keyIdx = 0; double value; IFeature feature; int featureSize = instance.size(); Hashtable<Integer, Double> features = new Hashtable<Integer, Double>(); ArrayList<Integer> exist = new ArrayList<Integer>(); for (int featIdx = 0; featIdx < featureSize; featIdx++) { feature = instance.getFeature(featIdx); keyIdx = stat.getIndex(feature.getKey()); if (keyIdx < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { value = Double.parseDouble(feature.getValue()); } else if (ABinaryFeature.class.isInstance(feature)) { value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue()); } else { String fv = feature.getValue(); if (fv == null || !stat.contains(keyIdx, fv)) { fv = stat.getDefaultValue(); } value = instances.attribute(keyIdx + 1).indexOfValue(fv); } features.put(keyIdx + 1, value); exist.add(keyIdx + 1); } Collections.sort(exist); double[] attrValues = new double[exist.size() + 2]; int[] indices = new int[exist.size() + 2]; ids.addStringValue(instance.getID()); attrValues[0] = ids.indexOfValue(instance.getID()); indices[0] = 0; for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) { indices[valueIdx + 1] = exist.get(valueIdx); attrValues[valueIdx + 1] = features.get(indices[valueIdx + 1]); } Attribute tags = instances.attribute(keySize + 1); indices[exist.size() + 1] = keySize + 1; for (String tag : instance.getTag()) { if (tag.equals("'?'") || tag.equals("?")) { attrValues[exist.size() + 1] = Instance.missingValue(); } else { attrValues[exist.size() + 1] = tags.indexOfValue(tag); } SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2); instances.add(ins); } if (instance.getTag().size() == 0) { attrValues[exist.size() + 1] = Instance.missingValue(); SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2); instances.add(ins); } } return instances; }