List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:se.de.hu_berlin.informatik.faultlocalizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ILocalizerCache<T> localizer, ComputationStrategies strategy) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(localizer.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<>(); tf.add("t");/*from w w w . j av a 2 s . c o m*/ tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<>(); final ArrayList<Attribute> attributeList = new ArrayList<>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : localizer.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:se.de.hu_berlin.informatik.stardust.localizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public SBFLRanking<T> localize(final ISpectra<T> spectra) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<String>(); tf.add("t");// w w w. ja va2 s . c om tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>(); final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : spectra.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final SBFLRanking<T> ranking = new SBFLRanking<>(); Log.out(this, "begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { Log.out(this, String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.add(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }
From source file:semana07.IrisKnn.java
public static void main(String[] args) throws FileNotFoundException, IOException, Exception { // DEFININDO CONJUNTO DE TREINAMENTO // - Definindo o leitor do arquivo arff FileReader baseIris = new FileReader("iris.arff"); // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff" Instances iris = new Instances(baseIris); // - Definindo o indice do atributo classe iris.setClassIndex(4);//from w w w . j av a 2 s . co m iris = iris.resample(new Debug.Random()); Instances irisTreino = iris.trainCV(3, 0); Instances irisTeste = iris.testCV(3, 0); // DEFININDO EXEMPLO DESCONHECIDO //5.9,3.0,5.1,1.8,Iris-virginica Instance irisInst = new DenseInstance(iris.numAttributes()); irisInst.setDataset(iris); irisInst.setValue(0, 5.9); irisInst.setValue(1, 3.0); irisInst.setValue(2, 5.1); irisInst.setValue(3, 1.8); // DEFININDO ALGORITMO DE CLASSIFICAO //NN IBk vizinhoIris = new IBk(); //kNN IBk knnIris = new IBk(3); // MONTANDO CLASSIFICADOR //NN vizinhoIris.buildClassifier(irisTreino); //kNN knnIris.buildClassifier(irisTreino); // Definindo arquivo a ser escrito FileWriter writer = new FileWriter("iris.csv"); // Escrevendo o cabealho do arquivo writer.append("Classe Real;Resultado NN;Resultado kNN"); writer.append(System.lineSeparator()); // Sada CLI / Console System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho for (int i = 0; i <= irisTeste.numInstances() - 1; i++) { Instance testeIris = irisTeste.instance(i); // Sada CLI / Console do valor original System.out.print(testeIris.stringValue(4) + ";"); // Escrevendo o valor original no arquivo writer.append(testeIris.stringValue(4) + ";"); // Definindo o atributo classe como indefinido testeIris.setClassMissing(); // CLASSIFICANDO A INSTANCIA // NN double respostaVizinho = vizinhoIris.classifyInstance(testeIris); testeIris.setValue(4, respostaVizinho); String stringVizinho = testeIris.stringValue(4); //kNN double respostaKnn = knnIris.classifyInstance(testeIris); // Atribuindo respota ao valor do atributo do index 4(classe) testeIris.setValue(4, respostaKnn); String stringKnn = testeIris.stringValue(4); // Adicionando resultado ao grupo de instancia iris iris.add(irisInst); //Escrevendo os resultados no arquivo iris.csv writer.append(stringVizinho + ";"); writer.append(stringKnn + ";"); writer.append(System.lineSeparator()); // Exibindo via CLI / Console o resultado System.out.print(respostaVizinho + ";"); System.out.print(respostaKnn + ";"); System.out.println(testeIris.stringValue(4)); } writer.flush(); writer.close(); }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * generate instances for classifier classIdx * * @param p_Instances//from www. j a v a 2s .c om * input instances * @param p_ClassIndex * class index * @param p_ID2Classes * instance ids * @return new instances */ protected Instances genInstances(Instances p_Instances, double p_ClassIndex, Hashtable<String, ArrayList<Double>> p_ID2Classes) { Instances newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); Instance newInst = null; if (SparseInstance.class.isInstance(inst)) { newInst = new SparseInstance(inst); } else { newInst = new Instance(inst); } if (newInst.value(p_Instances.classIndex()) == p_ClassIndex) { newInst.setValue(inst.classIndex(), 1); } else { if (p_ID2Classes == null || !p_ID2Classes.get(inst.stringValue(this.m_IndexOfID)) .contains(new Double(p_ClassIndex))) { newInst.setValue(inst.classIndex(), 0); } else { continue; } } newInst.deleteAttributeAt(this.m_IndexOfID); newInst.setDataset(newInsts); newInsts.add(newInst); } return newInsts; }
From source file:sim.app.ubik.behaviors.sharedservices.EMClustering.java
License:Open Source License
/** * Obtener instancia de una persona en el cluster * @param ui/*from w w w . j av a 2s . c o m*/ * @return */ private Instance getInstance(UserInterface ui) { SharedService ss = slist.get(0); Instance inst = new Instance(ss.getConfigurations().length); for (int i = 0; i < ss.getConfigurations().length; i++) { String nameConf = ss.getConfigurations()[i]; int valueConf = ui.getNegotiation().getPreferences(ss).get(nameConf); inst.setValue((Attribute) attributes.elementAt(i), valueConf); } return inst; }
From source file:sim.app.ubik.behaviors.sharedservices.EMClustering.java
License:Open Source License
private Instance getInstance2(UserInterface ui) { SharedService ss = slist.get(0);// w ww . jav a 2s . c o m Instance inst = new Instance(ss.getConfigurations().length); vm.setCss(ss); ArrayList<MutableInt2D> votes = vm.getUserVotes(ui); for (int i = 0; i < ss.getConfigurations().length; i++) { inst.setValue((Attribute) attributes.elementAt(i), votes.get(i).y); } return inst; }
From source file:sim.app.ubik.behaviors.sharedservices.Preferences.java
License:Open Source License
/** * Mtodo para crear objeto Instance a partir de un hashmap de preferencias * @param preferences, obtenido de getPreferences, por ejemplo * @return Preferencias en hashmap// w w w . j ava2 s.co m */ public Instance getPreferencesInstance(SharedService ss) { HashMap<String, Integer> preferences = this.getPreferences(ss); ArrayList<Attribute> atts = new ArrayList<Attribute>(); for (String key : preferences.keySet()) { System.out.println("Creando atributo: " + key); atts.add(new Attribute(key)); } // Create empty instance Instance inst = new Instance(preferences.size() + 1); Attribute x = new Attribute("x"); // Set instance's values for the attributes for (int i = 0; i < preferences.size(); i++) { System.out.println("Obteniendo preferencia numero: " + i); inst.setValue(x, 0.0); } return inst; }
From source file:simulatorGUI.MassSpec.java
License:Open Source License
public boolean computeIsotopicEnvelopes(Peptide peptide, ArrayList<Modification> mods) { int chargeFloor = 0; int chargeCeil = 0; resetCounts(); // this allows us to avoid re-allocating the elements for each new sequence // set modifications boolean modMethionine = false; boolean modPhosphorylation = false; boolean modPyroglutamate = false; if (mods != null) { for (int i = 0; i < mods.size(); i++) { switch (mods.get(i).name) { case "methionine": modMethionine = true;/* ww w . j ava2s . c o m*/ break; case "phosphorylation": modPhosphorylation = true; break; case "pyroglutamate": modPyroglutamate = true; break; } } } // 1. Modify poly amino acids to standard, // 2. Get atom counts from the amino acid, // 3. Apply post translational modifications to atom counts. for (char rawAA : peptide.sequence.toCharArray()) { char aa = rawAA; // poly amino acids: "X" is for any (I exclude uncommon "U" and "O") if (aa == 'X') { // poly amino acids: "X" is for any (I exclude uncommon "U" and "O") aa = aasX.charAt(localRandomFactory.localRand.nextInt(aasX.length())); } else if (aa == 'B') { // poly amino acids: "B" is "N" or "D" aa = aasB.charAt(localRandomFactory.localRand.nextInt(aasB.length())); aaB += 1; } else if (aa == 'Z') { // poly amino acids: "Z" is "Q" or "E" aa = aasZ.charAt(localRandomFactory.localRand.nextInt(aasZ.length())); aaZ += 1; } switch (aa) { // standard amino acids: (modifications included beneath case) case 'A': //=> { :C =>3, :H =>5 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaA += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 3; elementH.count += 5; elementO.count += 1; elementN.count += 1; break; case 'C': // => { :C =>3, :H =>5 , :O =>1 , :N =>1 , :S =>1 , :P =>0, :Se =>0 }, aaC += 1; // charge calculation chargeC += 1; // base: elementC.count += 3; elementH.count += 5; elementO.count += 1; elementN.count += 1; elementS.count += 1; if (Modifications.CarbamidomethylationGain) { // PTM: carbamidomethylation, gain, static, C 2 H 3 N 1 O 1 S 0 elementC.count += 2; elementH.count += 3; elementN.count += 1; elementO.count += 1; } break; case 'D': // => { :C =>4, :H =>5 , :O =>3 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaD += 1; // charge calculation chargeD += 1; elementC.count += 4; elementH.count += 5; elementO.count += 3; elementN.count += 1; break; case 'E': // => { :C =>5, :H =>7 , :O =>3 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaE += 1; // charge calculation chargeE += 1; elementC.count += 5; elementH.count += 7; elementO.count += 3; elementN.count += 1; break; case 'F': // => { :C =>9, :H =>9 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaF += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 9; elementH.count += 9; elementO.count += 1; elementN.count += 1; break; case 'G': //=> { :C =>2, :H =>3 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaG += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 2; elementH.count += 3; elementO.count += 1; elementN.count += 1; break; case 'I': // => { :C =>6, :H =>11 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaI += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 6; elementH.count += 11; elementO.count += 1; elementN.count += 1; break; case 'H': // => { :C =>6, :H =>7 , :O =>1 , :N =>3 , :S =>0 , :P =>0, :Se =>0 }, aaH += 1; //charge calculation chargeH += 1; elementC.count += 6; elementH.count += 7; elementO.count += 1; elementN.count += 3; break; case 'K': // => { :C =>6, :H =>12 , :O =>1 , :N =>2 , :S =>0 , :P =>0, :Se =>0 }, aaK += 1; // charge calculation chargeK += 1; elementC.count += 6; elementH.count += 12; elementO.count += 1; elementN.count += 2; break; case 'L': // => { :C =>6, :H =>11 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaL += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 6; elementH.count += 11; elementO.count += 1; elementN.count += 1; break; case 'M': // => { :C =>5, :H =>9 , :O =>1 , :N =>1 , :S =>1 , :P =>0, :Se =>0 }, aaM += 1; // charge calculation chargeHydrophobic += 1; // base: elementC.count += 5; elementH.count += 9; elementO.count += 1; elementN.count += 1; elementS.count += 1; // PTM: M -> oxidation of methionine, gain, variable, O 1 // variable if (modMethionine) { elementO.count += 1; } break; case 'N': // => { :C =>4, :H =>6 , :O =>2 , :N =>2 , :S =>0 , :P =>0, :Se =>0 }, aaN += 1; // charge calculation chargePolar += 1; elementC.count += 4; elementH.count += 6; elementO.count += 2; elementN.count += 2; break; case 'O': // => { :C =>12, :H =>19 , :O =>2 , :N =>3 , :S =>0 , :P =>0, :Se =>0 }, elementC.count += 12; elementH.count += 19; elementO.count += 2; elementN.count += 3; break; case 'P': // => { :C =>5, :H =>7 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaP += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 5; elementH.count += 7; elementO.count += 1; elementN.count += 1; break; case 'Q': // => { :C =>5, :H =>8 , :O =>2 , :N =>2 , :S =>0 , :P =>0, :Se =>0 }, aaQ += 1; // charge calculation chargePolar += 1; // base: elementC.count += 5; elementH.count += 8; elementO.count += 2; elementN.count += 2; if (modPyroglutamate) { // Q -> pyroglutamate (or pyroglutamic acid) loss, variable, N 1 H 3 elementN.count -= 1; elementH.count -= 3; } break; case 'R': // => { :C =>6, :H =>12 , :O =>1 , :N =>4 , :S =>0 , :P =>0, :Se =>0 }, aaR += 1; // charge calculation chargeR += 1; elementC.count += 6; elementH.count += 12; elementO.count += 1; elementN.count += 4; break; case 'S': // => { :C =>3, :H =>5 , :O =>2 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaS += 1; // charge calculation chargePolar += 1; // base: elementC.count += 3; elementH.count += 5; elementO.count += 2; elementN.count += 1; if (modPhosphorylation) { // S,T,Y -> phosphorylation, gain, H 1 O 3 P 1 elementH.count += 1; elementO.count += 3; elementP.count += 1; } break; case 'T': // => { :C =>4, :H =>7 , :O =>2 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaT += 1; // charge calculation chargePolar += 1; // base: elementC.count += 4; elementH.count += 7; elementO.count += 2; elementN.count += 1; if (modPhosphorylation) { // S,T,Y -> phosphorylation, gain, H 1 O 3 P 1 elementH.count += 1; elementO.count += 3; elementP.count += 1; } break; case 'U': // => { :C =>3, :H =>5 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>1 }, // charge calculation chargeU += 1; elementC.count += 3; elementH.count += 5; elementO.count += 1; elementN.count += 1; elementSe.count += 1; break; case 'V': // => { :C =>5, :H =>9 , :O =>1 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaV += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 5; elementH.count += 9; elementO.count += 1; elementN.count += 1; break; case 'W': // => { :C =>11, :H =>10 , :O =>1 , :N =>2 , :S =>0 , :P =>0, :Se =>0 }, aaW += 1; // charge calculation chargeHydrophobic += 1; elementC.count += 11; elementH.count += 10; elementO.count += 1; elementN.count += 2; break; case 'Y': // => { :C =>9, :H =>9 , :O =>2 , :N =>1 , :S =>0 , :P =>0, :Se =>0 }, aaY += 1; // charge calc chargeY += 1; // base: elementC.count += 9; elementH.count += 9; elementO.count += 2; elementN.count += 1; if (modPhosphorylation) { // S,T,Y -> phosphorylation, gain, H 1 O 3 P 1 elementH.count += 1; elementO.count += 3; elementP.count += 1; } break; default: JOptionPane.showMessageDialog(null, "Error: Amino acid in fasta not recognized.", "Error", JOptionPane.ERROR_MESSAGE); return false; } } // // Figure out charge // double preCharge = 0; preCharge += -1.0 / (1.0 + Math.pow(10.0, residueTable.get(peptide.sequence.charAt(0))[1] - Modifications.pH)); preCharge += -chargeD / (1.0 + Math.pow(10.0, 3.65 - Modifications.pH)); preCharge += -chargeE / (1.0 + Math.pow(10.0, 4.25 - Modifications.pH)); preCharge += -chargeC / (1.0 + Math.pow(10.0, 8.18 - Modifications.pH)); preCharge += -chargeY / (1.0 + Math.pow(10.0, 10.07 - Modifications.pH)); preCharge += 1.0 / (1.0 + Math.pow(10.0, Modifications.pH - residueTable.get(peptide.sequence.charAt(peptide.sequence.length() - 1))[0])); preCharge += chargeH / (1.0 + Math.pow(10.0, Modifications.pH - 6.00)); preCharge += chargeK / (1.0 + Math.pow(10.0, Modifications.pH - 10.53)); preCharge += chargeR / (1.0 + Math.pow(10.0, Modifications.pH - 12.48)); chargeFloor = (int) Math.floor(preCharge); chargeCeil = (int) Math.ceil(preCharge); int[] charges; if (chargeFloor == 0 || chargeFloor == chargeCeil) { charges = new int[1]; charges[0] = chargeCeil; } else { charges = new int[2]; charges[0] = chargeFloor; charges[1] = chargeCeil; } int origHCount = elementH.count; for (double charge : charges) { if (charge > 0) { // // Tweak H count based on charge // elementH.count = origHCount + (int) charge; // // compute the isotopic distribution (MZs and intensities) // // calculate the isotopic slice based on the atom counts // get the lowNominal and highNominal values int lowNominal = 0; int highNominal = 0; for (Element el : elements) { lowNominal += el.getLowMassNumber() * el.count; highNominal += el.getHighMassNumber() * el.count; } // get the fft of the vector of relative abundances for the isotopes of each element int nextPow2 = 1024; while (highNominal > nextPow2) { nextPow2 *= 2; } double[] relativeAbundancesReal = new double[nextPow2]; double[] relativeAbundancesImag = new double[nextPow2]; double[] fftAbundancesReal = new double[nextPow2]; double[] fftAbundancesImag = new double[nextPow2]; double monoMass = 0; boolean firstGo = true; for (Element el : elements) { // reset the arrays to compute the relative abundances double[][] relativeAbundances = el.getRelativeAbundanceFFT(nextPow2, fftBase); relativeAbundancesReal = relativeAbundances[0]; relativeAbundancesImag = relativeAbundances[1]; // convolve the frequencies of each element double prevReal = 0; for (int i = 0; i < relativeAbundancesReal.length; i++) { double[] power = complexPower(relativeAbundancesReal[i], relativeAbundancesImag[i], el.count); if (firstGo) { fftAbundancesReal[i] = power[0]; fftAbundancesImag[i] = power[1]; } else { prevReal = fftAbundancesReal[i]; fftAbundancesReal[i] = fftAbundancesReal[i] * power[0] - fftAbundancesImag[i] * power[1]; fftAbundancesImag[i] = prevReal * power[1] + fftAbundancesImag[i] * power[0]; } } firstGo = false; monoMass += el.count * el.getMonoIsotopicmass(); } fftBase.fft(fftAbundancesImag, fftAbundancesReal); // Inverse FFT double maxAbundance = 0; double totalAbundance = 0; int monoMZIndex = 0; for (int i = lowNominal; i < highNominal - 1; i++) { if (fftAbundancesReal[i] > 0) { totalAbundance += fftAbundancesReal[i]; if (fftAbundancesReal[i] > maxAbundance) { monoMZIndex = i; } } } double normalizedAbundance = 0; double newMass = 0; double lastMass = (monoMass + charge) / charge - NEUTRON_MASS / charge; ArrayList<Double> isotopeMasses = new ArrayList<Double>(10); ArrayList<Double> isotopeIntensities = new ArrayList<Double>(10); // we only want entries with index between lowNominal and highNominal for (int i = lowNominal; i < highNominal - 1; i++) { // normalize normalizedAbundance = fftAbundancesReal[i] / totalAbundance; newMass = lastMass + NEUTRON_MASS / charge; lastMass = newMass; // keep if above threshold if (normalizedAbundance > ABUNDANCE_THRESHOLD) { isotopeMasses.add(newMass); isotopeIntensities.add(normalizedAbundance); if (i == monoMZIndex) { monoMZ = newMass; } } } // make sure in mz range if (isotopeMasses.size() > 0 && (isotopeMasses.get(0) > minMZ || isotopeMasses.get(isotopeMasses.size() - 1) < maxMZ)) { // calculate RT: create weka instance and run on model to get RTs Instance rtInstance = new Instance(rtData.numAttributes()); rtInstance.setValue(rtAttA, aaA); rtInstance.setValue(rtAttR, aaR); rtInstance.setValue(rtAttN, aaN); rtInstance.setValue(rtAttD, aaD); rtInstance.setValue(rtAttB, aaB); rtInstance.setValue(rtAttC, aaC); rtInstance.setValue(rtAttE, aaE); rtInstance.setValue(rtAttQ, aaQ); rtInstance.setValue(rtAttZ, aaZ); rtInstance.setValue(rtAttG, aaG); rtInstance.setValue(rtAttH, aaH); rtInstance.setValue(rtAttI, aaI); rtInstance.setValue(rtAttL, aaL); rtInstance.setValue(rtAttK, aaK); rtInstance.setValue(rtAttM, aaM); rtInstance.setValue(rtAttF, aaF); rtInstance.setValue(rtAttP, aaP); rtInstance.setValue(rtAttS, aaS); rtInstance.setValue(rtAttT, aaT); rtInstance.setValue(rtAttW, aaW); rtInstance.setValue(rtAttY, aaY); rtInstance.setValue(rtAttV, aaV); rtInstance.setValue(rtAttJ, aaJ); rtData.add(rtInstance); double predictedRt = 0; try { predictedRt = rtCls.classifyInstance(rtInstance); //predictedRt = localRandomFactory.localRand.nextDouble() * MassSpec.runTime; } catch (Exception ex) { JOptionPane.showMessageDialog(null, "WEKA error 1", "Error", JOptionPane.ERROR_MESSAGE); return false; } // // Create isotopic envelope // if (predictedRt > 0) { // check this upfront so as not to waste time double secondsToScans = 250.0 * (double) samplingRate * 60.0; //Convert # seconds (250 max) to # scans int traceLength = (int) (localRandomFactory.localRand.nextDouble() * secondsToScans); // find first rtArray index greater than value (or rtArray.size-1 if at tail) int rtIndex = 0; int start = (int) (predictedRt * 10.0); if (predictedRt > rtArray[rtArray.length - 1]) { rtIndex = rtArray.length - 1; } else { rtIndex = rtArrayShifted[start]; } int rtFloor = Math.max(0, rtIndex - (int) (secondsToScans / 4.0)); int rtCeil = Math.min(rtArray.length - 1, rtIndex + (int) (secondsToScans / 2.0)); IsotopicEnvelope isotopicEnvelope = new IsotopicEnvelope(isotopeIntensities, isotopeMasses, peptide.abundance, predictedRt, traceLength, rtFloor, rtCeil, charge, peptide.peptideID, peptide.sequence, isotopeEnvelopeID, charges); isotopeEnvelopeID++; // Add this isotopicEnvelope to the list of all envelopes isotopicEnvelopesInstance.add(isotopicEnvelope); } } } } return true; }
From source file:sirius.predictor.main.PredictorFrame.java
License:Open Source License
private void runType3Classifier(ClassifierData classifierData) { /*//from w w w . java 2 s . c o m * This is for type3 classifier * Note that all position and motif list only does not apply to this classifier as * it will only give one score for each sequence */ if (sequenceNameTableModel.getRowCount() < 1) { JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence", JOptionPane.INFORMATION_MESSAGE); return; } if (loadFastaFileMenuItem.getState() == false) { JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!", "Wrong File Format", JOptionPane.INFORMATION_MESSAGE); return; } if (onAllPositionsMenuItem.getState() == false) { JOptionPane.showMessageDialog(this, "For type 3 classifier, it make only one prediction a sequence", "Information", JOptionPane.INFORMATION_MESSAGE); } try { BufferedWriter output = new BufferedWriter(new FileWriter( outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores")); Classifier classifierOne = classifierData.getClassifierOne(); //Reading and Storing the featureList Instances inst = classifierData.getInstances(); ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>(); for (int x = 0; x < inst.numAttributes() - 1; x++) { //-1 because class attribute must be ignored featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name())); } //Going through each and every sequence for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) { if (stopClassifier == true) { statusPane.setText("Running of Classifier Stopped!"); stopClassifier = false; output.close(); return; } //if(x%100 == 0) statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x + " / " + sequenceNameTableModel.getRowCount()); //Header output.write(sequenceNameTableModel.getHeader(x)); output.newLine(); output.write(sequenceNameTableModel.getSequence(x)); output.newLine(); //Sequence Score -> index-score, index-score String sequence = sequenceNameTableModel.getSequence(x); Instance tempInst; tempInst = new Instance(inst.numAttributes()); tempInst.setDataset(inst); for (int z = 0; z < inst.numAttributes() - 1; z++) { //-1 because class attribute can be ignored //Give the sequence and the featureList to get the feature freqs on the sequence Object obj = GenerateArff.getMatchCount("+1_Index(-1)", sequence, featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(), classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix()); if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer")) tempInst.setValue(z, (Integer) obj); else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double")) tempInst.setValue(z, (Double) obj); else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String")) tempInst.setValue(z, (String) obj); else { output.close(); throw new Error("Unknown: " + obj.getClass().getName()); } } //note that pos or neg does not matter as this is not used tempInst.setValue(inst.numAttributes() - 1, "pos"); try { double[] results = classifierOne.distributionForInstance(tempInst); output.write("0=" + results[0]); } catch (Exception e) { //this is to ensure that the run will continue output.write("0=-0.0"); //change throw error to screen output if i want the run to continue System.err .println("Exception has Occurred for classifierOne.distributionForInstance(tempInst);"); } output.newLine(); output.flush(); } output.flush(); output.close(); statusPane.setText("ClassifierOne finished running..."); loadScoreFile(outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }
From source file:sirius.predictor.main.PredictorFrame.java
License:Open Source License
private void runClassifier(ClassifierData classifierData, boolean allPositions) { //this method is for type 1 classifier with all positions and motif list //and type 2 classifier with all positions if (sequenceNameTableModel.getRowCount() < 1) { JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence", JOptionPane.INFORMATION_MESSAGE); return;/*w w w . j a v a 2s . c o m*/ } if (loadFastaFileMenuItem.getState() == false) { JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!", "Wrong File Format", JOptionPane.INFORMATION_MESSAGE); return; } if (onAllPositionsMenuItem.getState() == false && motifListTableModel.getSize() == 0) { JOptionPane.showMessageDialog(this, "There are no Motifs chosen in Motif List!", "No Motifs", JOptionPane.INFORMATION_MESSAGE); MotifListDialog dialog = new MotifListDialog(motifListTableModel); dialog.setLocationRelativeTo(this); dialog.setVisible(true); return; } while (outputDirectory == null) { JOptionPane.showMessageDialog(this, "Please set output directory first!", "Output Directory not set", JOptionPane.INFORMATION_MESSAGE); setOutputDirectory(); //return; } try { BufferedWriter output = new BufferedWriter(new FileWriter( outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores")); Classifier classifierOne = classifierData.getClassifierOne(); int leftMostPosition = classifierData.getLeftMostPosition(); int rightMostPosition = classifierData.getRightMostPosition(); //Reading and Storing the featureList Instances inst = classifierData.getInstances(); ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>(); for (int x = 0; x < inst.numAttributes() - 1; x++) { //-1 because class attribute must be ignored featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name())); } for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) { if (stopClassifier == true) { statusPane.setText("Running of Classifier Stopped!"); stopClassifier = false; output.close(); return; } //if(x%100 == 0) statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x + " / " + sequenceNameTableModel.getRowCount()); //Header output.write(sequenceNameTableModel.getHeader(x)); output.newLine(); output.write(sequenceNameTableModel.getSequence(x)); output.newLine(); //Sequence Score -> index-score, index-score String sequence = sequenceNameTableModel.getSequence(x); int minSequenceLengthRequired; int targetLocationIndex; if (leftMostPosition < 0 && rightMostPosition > 0) {// -ve and +ve minSequenceLengthRequired = (leftMostPosition * -1) + rightMostPosition; targetLocationIndex = (leftMostPosition * -1); } else if (leftMostPosition < 0 && rightMostPosition < 0) {//-ve and -ve minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1; targetLocationIndex = (leftMostPosition * -1); } else {//+ve and +ve minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1; targetLocationIndex = (leftMostPosition * -1); } boolean firstEntryForClassifierOne = true; for (int y = 0; y + (minSequenceLengthRequired - 1) < sequence.length(); y++) { //Check if targetLocation match any motif in motif List if (allPositions == false && motifListTableModel .gotMotifMatch(sequence.substring(y + 0, y + targetLocationIndex)) == false) continue; String line2 = sequence.substring(y + 0, y + minSequenceLengthRequired); Instance tempInst; tempInst = new Instance(inst.numAttributes()); tempInst.setDataset(inst); for (int z = 0; z < inst.numAttributes() - 1; z++) { //-1 because class attribute can be ignored //Give the sequence and the featureList to get the feature freqs on the sequence Object obj = GenerateArff.getMatchCount("+1_Index(" + targetLocationIndex + ")", line2, featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(), classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix()); if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer")) tempInst.setValue(z, (Integer) obj); else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double")) tempInst.setValue(z, (Double) obj); else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String")) tempInst.setValue(z, (String) obj); else { output.close(); throw new Error("Unknown: " + obj.getClass().getName()); } } //note that pos or neg does not matter as this is not used tempInst.setValue(inst.numAttributes() - 1, "neg"); double[] results = classifierOne.distributionForInstance(tempInst); if (firstEntryForClassifierOne) firstEntryForClassifierOne = false; else output.write(","); output.write(y + targetLocationIndex + "=" + results[0]); } output.newLine(); output.flush(); } output.flush(); output.close(); statusPane.setText("ClassifierOne finished running..."); //Run classifier Two if it is type 2 if (classifierData.getClassifierType() == 2) { BufferedWriter output2 = new BufferedWriter(new FileWriter( outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores")); BufferedReader input2 = new BufferedReader(new FileReader( outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores")); Classifier classifierTwo = classifierData.getClassifierTwo(); Instances inst2 = classifierData.getInstances2(); int setUpstream = classifierData.getSetUpstream(); int setDownstream = classifierData.getSetDownstream(); int minScoreWindowRequired; if (setUpstream < 0 && setDownstream < 0) {//-ve and -ve minScoreWindowRequired = setDownstream - setUpstream + 1; } else if (setUpstream < 0 && setDownstream > 0) {//-ve and +ve minScoreWindowRequired = (setUpstream * -1) + setDownstream; } else {//+ve and +ve minScoreWindowRequired = setDownstream - setUpstream + 1; } String lineHeader; String lineSequence; int lineCounter2 = 0; while ((lineHeader = input2.readLine()) != null) { if (stopClassifier == true) { statusPane.setText("Running of Classifier Stopped!"); stopClassifier = false; output2.close(); input2.close(); return; } //if(lineCounter2%100 == 0) statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierTwo @ " + lineCounter2 + " / " + sequenceNameTableModel.getRowCount()); lineSequence = input2.readLine(); output2.write(lineHeader); output2.newLine(); output2.write(lineSequence); output2.newLine(); StringTokenizer locationScore = new StringTokenizer(input2.readLine(), ","); int totalTokens = locationScore.countTokens(); String[][] scores = new String[totalTokens][2]; int scoreIndex = 0; while (locationScore.hasMoreTokens()) { StringTokenizer locationScoreToken = new StringTokenizer(locationScore.nextToken(), "="); scores[scoreIndex][0] = locationScoreToken.nextToken();//location scores[scoreIndex][1] = locationScoreToken.nextToken();//score scoreIndex++; } int targetLocationIndex2; if (setUpstream == 0 || setDownstream == 0) { output2.close(); input2.close(); throw new Exception("setUpstream == 0 || setDownstream == 0"); } if (setUpstream < 0) { targetLocationIndex2 = Integer.parseInt(scores[0][0]) + (-setUpstream); } else {//setUpstream > 0 targetLocationIndex2 = Integer.parseInt(scores[0][0]); //first location } for (int x = 0; x + minScoreWindowRequired - 1 < totalTokens; x++) { //+1 is for the class index if (x != 0) output2.write(","); Instance tempInst2 = new Instance(minScoreWindowRequired + 1); tempInst2.setDataset(inst2); for (int y = 0; y < minScoreWindowRequired; y++) { tempInst2.setValue(y, Double.parseDouble(scores[x + y][1])); } tempInst2.setValue(tempInst2.numAttributes() - 1, "pos"); double[] results = classifierTwo.distributionForInstance(tempInst2); output2.write(targetLocationIndex2 + "=" + results[0]); targetLocationIndex2++; } lineCounter2++; output2.newLine(); } input2.close(); output2.close(); statusPane.setText("ClassifierTwo finished running..."); } if (classifierData.getClassifierType() == 1) loadScoreFile( outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"); else loadScoreFile( outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName() + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }