List of usage examples for weka.core Instance setMissing
public void setMissing(Attribute att);
From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java
License:Open Source License
@Override public List<Action> schedule(Set<ModelQuery> toSched) { SingularMachineState start = new SingularMachineState(toSched, qtp, sla); List<Action> toR = new LinkedList<Action>(); applyLoop: while (!start.isGoalState()) { log.fine("Current state: " + start); SortedMap<String, String> features = start.getFeatures(); Instance toClassify = new Instance(attributes.length); toClassify.setDataset(wekaDataSet); for (Attribute a : attributes) { if (a.name().equals("action")) { //toClassify.setValue(a, "N"); continue; }//from w ww .ja va2 s . com try { if (features.get(a.name()).equals("?")) { toClassify.setMissing(a); continue; } try { double d = Double.valueOf(features.get(a.name())); toClassify.setValue(a, d); } catch (NumberFormatException e) { toClassify.setValue(a, features.get(a.name())); } } catch (IllegalArgumentException e) { e.printStackTrace(); log.warning( "Encountered previously unseen attribute value! Might need better training data... making random selection."); log.warning("Value for attribute " + a.name() + " was " + features.get(a.name())); Action rand = getPUAction(start); log.warning("Random action selected: " + rand); toR.add(rand); start.applyAction(rand); continue applyLoop; } } toClassify.setClassMissing(); log.finer("Going to classify: " + toClassify); try { double d = tree.classifyInstance(toClassify); toClassify.setClassValue(d); String action = toClassify.stringValue(toClassify.classIndex()); log.finer("Got action string: " + action); Action selected = null; for (Action a : start.getPossibleActions()) { if (actionMatches(a, action)) { selected = a; break; } } if (selected == null) { //log.warning("Could not find applicable action for string: " + action + " ... picking random action"); Action a = getPUAction(start); start.applyAction(a); toR.add(a); continue; } log.fine("Selected action: " + selected); start.applyAction(selected); toR.add(selected); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } return toR; }
From source file:elh.eus.absa.Features.java
License:Open Source License
/** * Function fills the attribute vectors for the instances existing in the corpus given. * Attribute vectors contain the features loaded by the creatFeatureSet() function. * /*from w w w. ja va 2 s.c om*/ * @param boolean save : whether the Instances file should be saved to an arff file or not. * @return Weka Instances object containing the attribute vectors filled with the features specified * in the parameter file. */ public Instances loadInstances(boolean save, String prefix) throws IOException { String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_" + prefix; HashMap<String, Opinion> trainExamples = corpus.getOpinions(); int trainExamplesNum = trainExamples.size(); int bowWin = 0; if (params.containsKey("window")) { bowWin = Integer.parseInt(params.getProperty("window")); savePath = savePath + "_w" + bowWin; } //Properties posProp = new Properties(); //eus.ixa.ixa.pipe.pos.Annotate postagger = new eus.ixa.ixa.pipe.pos.Annotate(posProp); if (params.containsKey("lemmaNgrams")) { Properties posProp = NLPpipelineWrapper.setPostaggerProperties(params.getProperty("pos-model"), params.getProperty("lemma-model"), corpus.getLang(), "bin", "false"); postagger = new eus.ixa.ixa.pipe.pos.Annotate(posProp); } //System.out.println("train examples: "+trainExamplesNum); //Create the Weka object for the training set Instances rsltdata = new Instances("train", atts, trainExamplesNum); // setting class attribute (last attribute in train data. //traindata.setClassIndex(traindata.numAttributes() - 1); System.err.println("Features: loadInstances() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); System.out.println("Features: loadInstances() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); int instId = 1; // fill the vectors for each training example for (String oId : trainExamples.keySet()) { //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId())); //value vector double[] values = new double[featNum]; // first element is the instanceId values[rsltdata.attribute("instanceId").index()] = instId; // string normalization (emoticons, twitter grammar,...) String opNormalized = corpus.getOpinionSentence(oId); // compute uppercase ratio before normalization (if needed) double upRatio = 0.0; if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes")) { String upper = opNormalized.replaceAll("[\\p{Ll}]", ""); upRatio = (double) upper.length() / (double) opNormalized.length(); values[rsltdata.attribute("upperCaseRation").index()] = upRatio; } // string normalization (emoticons, twitter grammar,...) if ((params.containsKey("wfngrams") || params.containsKey("lemmaNgrams")) && (!params.getProperty("normalization", "none").equalsIgnoreCase("noEmot"))) { opNormalized = normalize(opNormalized, params.getProperty("normalization", "none")); } //process the current instance with the NLP pipeline in order to get token and lemma|pos features KAFDocument nafinst = new KAFDocument("", ""); String nafname = trainExamples.get(oId).getsId().replace(':', '_'); String nafDir = params.getProperty("kafDir"); String nafPath = nafDir + File.separator + nafname + ".kaf"; //counter for opinion sentence token number. Used for computing relative values of the features int tokNum = 1; try { if (params.containsKey("lemmaNgrams")) //(lemmaNgrams != null) && (!lemmaNgrams.isEmpty())) { if (FileUtilsElh.checkFile(nafPath)) { nafinst = KAFDocument.createFromFile(new File(nafPath)); } else { nafinst = NLPpipelineWrapper.ixaPipesTokPos(opNormalized, corpus.getLang(), params.getProperty("pos-model"), params.getProperty("lemma-model"), postagger); Files.createDirectories(Paths.get(nafDir)); nafinst.save(nafPath); } tokNum = nafinst.getWFs().size(); //System.err.println("Features::loadInstances - postagging opinion sentence ("+oId+") - "+corpus.getOpinionSentence(oId)); } else { if (FileUtilsElh.checkFile(nafPath)) { nafinst = KAFDocument.createFromFile(new File(nafPath)); } else { nafinst = NLPpipelineWrapper.ixaPipesTok(opNormalized, corpus.getLang()); } tokNum = nafinst.getWFs().size(); //System.err.println("Features::loadInstances - tokenizing opinion sentence ("+oId+") - "+corpus.getOpinionSentence(oId)); } } catch (IOException | JDOMException e) { System.err.println("Features::loadInstances() - error when NLP processing the instance " + instId + "|" + oId + ") for filling the attribute vector"); e.printStackTrace(); System.exit(5); } LinkedList<String> ngrams = new LinkedList<String>(); int ngramDim; try { ngramDim = Integer.valueOf(params.getProperty("wfngrams")); } catch (Exception e) { ngramDim = 0; } boolean polNgrams = false; if (params.containsKey("polNgrams")) { polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes"); } List<WF> window = nafinst.getWFs(); Integer end = corpus.getOpinion(oId).getTo(); // apply window if window active (>0) and if the target is not null (to=0) if ((bowWin > 0) && (end > 0)) { Integer start = corpus.getOpinion(oId).getFrom(); Integer to = window.size(); Integer from = 0; end++; for (int i = 0; i < window.size(); i++) { WF wf = window.get(i); if ((wf.getOffset() == start) && (i >= bowWin)) { from = i - bowWin; } else if (wf.getOffset() >= end) { if (i + bowWin < window.size()) { to = i + bowWin; } break; } } window = window.subList(from, to); //System.out.println("startTgt: "+start+" - from: "+from+" | endTrgt:"+(end-1)+" - to:"+to); } //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+ // "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n"); List<String> windowWFIds = new ArrayList<String>(); // word form ngram related features for (WF wf : window) { windowWFIds.add(wf.getId()); String wfStr = wf.getForm(); if (params.containsKey("wfngrams") && ngramDim > 0) { if (!savePath.contains("_wf" + ngramDim)) { savePath = savePath + "_wf" + ngramDim; } //if the current word form is in the ngram list activate the feature in the vector if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(wfStr); // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "wf", 1, false); //toknum } // Clark cluster info corresponding to the current word form if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) { if (!savePath.contains("_cl")) { savePath = savePath + "_cl"; } values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) { if (!savePath.contains("_br")) { savePath = savePath + "_br"; } values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) { if (!savePath.contains("_w2v")) { savePath = savePath + "_w2v"; } values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++; } } //empty ngram list and add remaining ngrams to the feature list checkNgramFeatures(ngrams, values, "wf", 1, true); //toknum // PoS tagger related attributes: lemmas and pos tags if (params.containsKey("lemmaNgrams") || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { ngrams = new LinkedList<String>(); if (params.containsKey("lemmaNgrams") && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) { ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams")); } else { ngramDim = 3; } LinkedList<String> posNgrams = new LinkedList<String>(); int posNgramDim = 0; if (params.containsKey("pos")) { posNgramDim = Integer.valueOf(params.getProperty("pos")); } for (Term t : nafinst.getTermsFromWFs(windowWFIds)) { //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0")) if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { if (!savePath.contains("_l" + ngramDim)) { savePath = savePath + "_l" + ngramDim; } String lemma = t.getLemma(); if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(lemma); // add ngrams to the feature vector for (int i = 0; i < ngrams.size(); i++) { String ng = featureFromArray(ngrams.subList(0, i + 1), "lemma"); //if the current lemma is in the ngram list activate the feature in the vector if (params.containsKey("lemmaNgrams") && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) { Attribute ngAtt = rsltdata.attribute(ng); if (ngAtt != null) { addNumericToFeatureVector(ng, values, 1); //tokNum } } ng = featureFromArray(ngrams.subList(0, i + 1), ""); if (params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { checkPolarityLexicons(ng, values, tokNum, polNgrams); } //end polarity ngram checker } //end ngram checking } //pos tags if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) { if (!savePath.contains("_p")) { savePath = savePath + "_p"; } if (posNgrams.size() >= posNgramDim) { posNgrams.removeFirst(); } posNgrams.add(t.getPos()); // add ngrams to the feature vector checkNgramFeatures(posNgrams, values, "pos", 1, false); } } //endFor //empty ngram list and add remaining ngrams to the feature list while (!ngrams.isEmpty()) { String ng = featureFromArray(ngrams, "lemma"); //if the current lemma is in the ngram list activate the feature in the vector if (rsltdata.attribute(ng) != null) { addNumericToFeatureVector(ng, values, 1); //tokNum } // polarity lexicons if (params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { checkPolarityLexicons(ng, values, tokNum, polNgrams); } //end polarity ngram checker ngrams.removeFirst(); } //empty pos ngram list and add remaining pos ngrams to the feature list checkNgramFeatures(posNgrams, values, "pos", 1, true); } // add sentence length as a feature if (params.containsKey("sentenceLength") && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) { values[rsltdata.attribute("sentenceLength").index()] = tokNum; } //create object for the current instance and associate it with the current train dataset. Instance inst = new SparseInstance(1.0, values); inst.setDataset(rsltdata); // add category attributte values String cat = trainExamples.get(oId).getCategory(); if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) { if (cat.compareTo("NULL") == 0) { inst.setValue(rsltdata.attribute("entCat").index(), cat); inst.setValue(rsltdata.attribute("attCat").index(), cat); } else { String[] splitCat = cat.split("#"); inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]); inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]); } //inst.setValue(attIndexes.get("entAttCat"), cat); } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) { inst.setValue(rsltdata.attribute("entAttCat").index(), cat); } if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) { // add class value as a double (Weka stores all values as doubles ) String pol = normalizePolarity(trainExamples.get(oId).getPolarity()); //System.err.println("Features::loadInstances - pol "+pol+" for oid "+oId+" - text:"+corpus.getOpinionSentence(oId)); if (pol != null && !pol.isEmpty()) { //System.err.println("polarity: _"+pol+"_"); inst.setValue(rsltdata.attribute("polarityCat"), pol); } else { inst.setMissing(rsltdata.attribute("polarityCat")); } } //add instance to train data rsltdata.add(inst); //store opinion Id and instance Id this.opInst.put(oId, instId); instId++; } System.err.println("Features : loadInstances() - training data ready total number of examples -> " + trainExamplesNum + " - " + rsltdata.numInstances()); if (save) { try { savePath = savePath + ".arff"; System.err.println("arff written to: " + savePath); ArffSaver saver = new ArffSaver(); saver.setInstances(rsltdata); saver.setFile(new File(savePath)); saver.writeBatch(); } catch (IOException e1) { e1.printStackTrace(); } catch (Exception e2) { e2.printStackTrace(); } } return rsltdata; }
From source file:elh.eus.absa.Features.java
License:Open Source License
/** * Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. * Attribute vectors contain the features loaded by the creatFeatureSet() function. * /* w w w. ja v a 2 s. com*/ * @param boolean save : whether the Instances file should be saved to an arff file or not. * @return Weka Instances object containing the attribute vectors filled with the features specified * in the parameter file. */ public Instances loadInstancesTAB(boolean save, String prefix) { String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_" + prefix; HashMap<String, Opinion> trainExamples = corpus.getOpinions(); int trainExamplesNum = trainExamples.size(); int bowWin = 0; if (params.containsKey("window")) { bowWin = Integer.parseInt(params.getProperty("window")); savePath = savePath + "_w" + bowWin; } //System.out.println("train examples: "+trainExamplesNum); //Create the Weka object for the training set Instances rsltdata = new Instances("train", atts, trainExamplesNum); // setting class attribute (last attribute in train data. //traindata.setClassIndex(traindata.numAttributes() - 1); System.err.println("Features: loadInstancesTAB() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); System.out.println("Features: loadInstancesTAB() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); int instId = 1; // fill the vectors for each training example for (String oId : trainExamples.keySet()) { //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId())); //value vector double[] values = new double[featNum]; // first element is the instanceId values[rsltdata.attribute("instanceId").index()] = instId; LinkedList<String> ngrams = new LinkedList<String>(); int ngramDim; try { ngramDim = Integer.valueOf(params.getProperty("wfngrams")); } catch (Exception e) { ngramDim = 0; } boolean polNgrams = false; if (params.containsKey("polNgrams")) { polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes"); } String[] noWindow = corpus.getOpinionSentence(oId).split("\n"); //counter for opinion sentence token number. Used for computing relative values of the features int tokNum = noWindow.length; List<String> window = Arrays.asList(noWindow); Integer end = corpus.getOpinion(oId).getTo(); // apply window if window active (>0) and if the target is not null (to=0) if ((bowWin > 0) && (end > 0)) { Integer start = corpus.getOpinion(oId).getFrom(); Integer from = start - bowWin; if (from < 0) { from = 0; } Integer to = end + bowWin; if (to > noWindow.length - 1) { to = noWindow.length - 1; } window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to)); } //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+ // "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n"); //System.err.println(Arrays.toString(window.toArray())); // word form ngram related features for (String wf : window) { String[] fields = wf.split("\t"); String wfStr = normalize(fields[0], params.getProperty("normalization", "none")); // blank line means we found a sentence end. Empty n-gram list and reiniciate. if (wf.equals("")) { // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, true); //toknum // since wf is empty no need to check for clusters and other features. continue; } if (params.containsKey("wfngrams") && ngramDim > 0) { if (!savePath.contains("_wf" + ngramDim)) { savePath = savePath + "_wf" + ngramDim; } //if the current word form is in the ngram list activate the feature in the vector if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(wfStr); // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, false); //toknum } // Clark cluster info corresponding to the current word form if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) { if (!savePath.contains("_cl")) { savePath = savePath + "_cl"; } values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) { if (!savePath.contains("_br")) { savePath = savePath + "_br"; } values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) { if (!savePath.contains("_w2v")) { savePath = savePath + "_w2v"; } values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++; } } //empty ngram list and add remaining ngrams to the feature list checkNgramFeatures(ngrams, values, "", 1, true); //toknum // PoS tagger related attributes: lemmas and pos tags if (params.containsKey("lemmaNgrams") || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { ngrams = new LinkedList<String>(); if (params.containsKey("lemmaNgrams") && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) { ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams")); } else { ngramDim = 3; } LinkedList<String> posNgrams = new LinkedList<String>(); int posNgramDim = 0; if (params.containsKey("pos")) { posNgramDim = Integer.valueOf(params.getProperty("pos")); } for (String t : window) { //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0")) if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { if (!savePath.contains("_l" + ngramDim)) { savePath = savePath + "_l" + ngramDim; } //blank line means we found a sentence end. Empty n-gram list and reiniciate. if (t.equals("")) { // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum // since t is empty no need to check for clusters and other features. continue; } String[] fields = t.split("\t"); if (fields.length < 2) { continue; } String lemma = normalize(fields[1], params.getProperty("normalization", "none")); if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(lemma); // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams); } //pos tags if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) { if (!savePath.contains("_p")) { savePath = savePath + "_p"; } if (posNgrams.size() >= posNgramDim) { posNgrams.removeFirst(); } String[] fields = t.split("\t"); if (fields.length < 3) { continue; } String pos = fields[2]; posNgrams.add(pos); // add ngrams to the feature vector checkNgramFeatures(posNgrams, values, "pos", 1, false); } } //endFor //empty ngram list and add remaining ngrams to the feature list // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams); //empty pos ngram list and add remaining pos ngrams to the feature list checkNgramFeatures(posNgrams, values, "pos", 1, true); } // add sentence length as a feature if (params.containsKey("sentenceLength") && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) { values[rsltdata.attribute("sentenceLength").index()] = tokNum; } // compute uppercase ratio before normalization (if needed) //double upRatio =0.0; //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes")) //{ // String upper = opNormalized.replaceAll("[a-z]", ""); // upRatio = (double)upper.length() / (double)opNormalized.length(); // values[rsltdata.attribute("upperCaseRation").index()] = upRatio; //} //create object for the current instance and associate it with the current train dataset. Instance inst = new SparseInstance(1.0, values); inst.setDataset(rsltdata); // add category attributte values String cat = trainExamples.get(oId).getCategory(); if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) { if (cat.compareTo("NULL") == 0) { inst.setValue(rsltdata.attribute("entCat").index(), cat); inst.setValue(rsltdata.attribute("attCat").index(), cat); } else { String[] splitCat = cat.split("#"); inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]); inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]); } //inst.setValue(attIndexes.get("entAttCat"), cat); } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) { inst.setValue(rsltdata.attribute("entAttCat").index(), cat); } if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) { // add class value as a double (Weka stores all values as doubles ) String pol = normalizePolarity(trainExamples.get(oId).getPolarity()); if (pol != null && !pol.isEmpty()) { inst.setValue(rsltdata.attribute("polarityCat"), pol); } else { //System.err.println("polarity: _"+pol+"_"); inst.setMissing(rsltdata.attribute("polarityCat")); } } //add instance to train data rsltdata.add(inst); //store opinion Id and instance Id this.opInst.put(oId, instId); instId++; } System.err.println("Features : loadInstancesTAB() - training data ready total number of examples -> " + trainExamplesNum + " - " + rsltdata.numInstances()); if (save) { try { savePath = savePath + ".arff"; System.err.println("arff written to: " + savePath); ArffSaver saver = new ArffSaver(); saver.setInstances(rsltdata); saver.setFile(new File(savePath)); saver.writeBatch(); } catch (IOException e1) { e1.printStackTrace(); } catch (Exception e2) { e2.printStackTrace(); } } return rsltdata; }
From source file:elh.eus.absa.Features.java
License:Open Source License
/** * Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. * Attribute vectors contain the features loaded by the creatFeatureSet() function. * /*from ww w . j a va 2 s . c om*/ * @param boolean save : whether the Instances file should be saved to an arff file or not. * @return Weka Instances object containing the attribute vectors filled with the features specified * in the parameter file. */ public Instances loadInstancesConll(boolean save, String prefix) { String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_" + prefix; HashMap<String, Opinion> trainExamples = corpus.getOpinions(); String nafdir = params.getProperty("kafDir"); int trainExamplesNum = trainExamples.size(); int bowWin = 0; if (params.containsKey("window")) { bowWin = Integer.parseInt(params.getProperty("window")); savePath = savePath + "_w" + bowWin; } //System.out.println("train examples: "+trainExamplesNum); //Create the Weka object for the training set Instances rsltdata = new Instances("train", atts, trainExamplesNum); // setting class attribute (last attribute in train data. //traindata.setClassIndex(traindata.numAttributes() - 1); System.err.println("Features: loadInstancesConll() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); System.out.println("Features: loadInstancesConll() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); int instId = 1; // fill the vectors for each training example for (String oId : trainExamples.keySet()) { //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId())); //value vector double[] values = new double[featNum]; // first element is the instanceId values[rsltdata.attribute("instanceId").index()] = instId; LinkedList<String> ngrams = new LinkedList<String>(); int ngramDim; try { ngramDim = Integer.valueOf(params.getProperty("wfngrams")); } catch (Exception e) { ngramDim = 0; } boolean polNgrams = false; if (params.containsKey("polNgrams")) { polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes"); } String nafPath = nafdir + File.separator + trainExamples.get(oId).getsId().replace(':', '_'); String taggedFile = ""; try { if (!FileUtilsElh.checkFile(nafPath + ".kaf")) { nafPath = NLPpipelineWrapper.tagSentence(corpus.getOpinionSentence(oId), nafPath, corpus.getLang(), params.getProperty("pos-model"), params.getProperty("lemma-model"), postagger); } else { nafPath = nafPath + ".kaf"; } InputStream reader = new FileInputStream(new File(nafPath)); taggedFile = IOUtils.toString(reader); reader.close(); } catch (IOException | JDOMException fe) { // TODO Auto-generated catch block fe.printStackTrace(); } String[] noWindow = taggedFile.split("\n"); //counter for opinion sentence token number. Used for computing relative values of the features int tokNum = noWindow.length; //System.err.println("Features::loadInstancesConll - tagged File read lines:"+tokNum); List<String> window = Arrays.asList(noWindow); Integer end = corpus.getOpinion(oId).getTo(); // apply window if window active (>0) and if the target is not null (to=0) if ((bowWin > 0) && (end > 0)) { Integer start = corpus.getOpinion(oId).getFrom(); Integer from = start - bowWin; if (from < 0) { from = 0; } Integer to = end + bowWin; if (to > noWindow.length - 1) { to = noWindow.length - 1; } window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to)); } //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+ // "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n"); //System.err.println(Arrays.toString(window.toArray())); // word form ngram related features for (String wf : window) { String[] fields = wf.split("\\s"); String wfStr = normalize(fields[0], params.getProperty("normalization", "none")); // blank line means we found a sentence end. Empty n-gram list and reiniciate. if (wf.equals("")) { // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, true); //toknum // since wf is empty no need to check for clusters and other features. continue; } if (params.containsKey("wfngrams") && ngramDim > 0) { if (!savePath.contains("_wf" + ngramDim)) { savePath = savePath + "_wf" + ngramDim; } //if the current word form is in the ngram list activate the feature in the vector if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(wfStr); // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, false); //toknum } // Clark cluster info corresponding to the current word form if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) { if (!savePath.contains("_cl")) { savePath = savePath + "_cl"; } values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) { if (!savePath.contains("_br")) { savePath = savePath + "_br"; } values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) { if (!savePath.contains("_w2v")) { savePath = savePath + "_w2v"; } values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++; } } //empty ngram list and add remaining ngrams to the feature list checkNgramFeatures(ngrams, values, "", 1, true); //toknum // PoS tagger related attributes: lemmas and pos tags if (params.containsKey("lemmaNgrams") || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { ngrams = new LinkedList<String>(); if (params.containsKey("lemmaNgrams") && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) { ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams")); } else { ngramDim = 3; } LinkedList<String> posNgrams = new LinkedList<String>(); int posNgramDim = 0; if (params.containsKey("pos")) { posNgramDim = Integer.valueOf(params.getProperty("pos")); } for (String t : window) { //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0")) if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { if (!savePath.contains("_l" + ngramDim)) { savePath = savePath + "_l" + ngramDim; } //blank line means we found a sentence end. Empty n-gram list and reiniciate. if (t.equals("")) { // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum // since t is empty no need to check for clusters and other features. continue; } String[] fields = t.split("\\s"); if (fields.length < 2) { continue; } String lemma = normalize(fields[1], params.getProperty("normalization", "none")); if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(lemma); // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams); } //pos tags if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) { if (!savePath.contains("_p")) { savePath = savePath + "_p"; } if (posNgrams.size() >= posNgramDim) { posNgrams.removeFirst(); } String[] fields = t.split("\\s"); if (fields.length < 3) { continue; } String pos = fields[2]; posNgrams.add(pos); // add ngrams to the feature vector checkNgramFeatures(posNgrams, values, "pos", 1, false); } } //endFor //empty ngram list and add remaining ngrams to the feature list // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams); //empty pos ngram list and add remaining pos ngrams to the feature list checkNgramFeatures(posNgrams, values, "pos", 1, true); } // add sentence length as a feature if (params.containsKey("sentenceLength") && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) { values[rsltdata.attribute("sentenceLength").index()] = tokNum; } // compute uppercase ratio before normalization (if needed) //double upRatio =0.0; //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes")) //{ // String upper = opNormalized.replaceAll("[a-z]", ""); // upRatio = (double)upper.length() / (double)opNormalized.length(); // values[rsltdata.attribute("upperCaseRation").index()] = upRatio; //} //create object for the current instance and associate it with the current train dataset. Instance inst = new SparseInstance(1.0, values); inst.setDataset(rsltdata); // add category attributte values String cat = trainExamples.get(oId).getCategory(); if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) { if (cat.compareTo("NULL") == 0) { inst.setValue(rsltdata.attribute("entCat").index(), cat); inst.setValue(rsltdata.attribute("attCat").index(), cat); } else { String[] splitCat = cat.split("#"); inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]); inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]); } //inst.setValue(attIndexes.get("entAttCat"), cat); } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) { inst.setValue(rsltdata.attribute("entAttCat").index(), cat); } if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) { // add class value as a double (Weka stores all values as doubles ) String pol = normalizePolarity(trainExamples.get(oId).getPolarity()); if (pol != null && !pol.isEmpty()) { inst.setValue(rsltdata.attribute("polarityCat"), pol); } else { //System.err.println("polarity: _"+pol+"_"); inst.setMissing(rsltdata.attribute("polarityCat")); } } //add instance to train data rsltdata.add(inst); //store opinion Id and instance Id this.opInst.put(oId, instId); instId++; } System.err.println("Features : loadInstancesConll() - training data ready total number of examples -> " + trainExamplesNum + " - " + rsltdata.numInstances()); if (save) { try { savePath = savePath + ".arff"; System.err.println("arff written to: " + savePath); ArffSaver saver = new ArffSaver(); saver.setInstances(rsltdata); saver.setFile(new File(savePath)); saver.writeBatch(); } catch (IOException e1) { e1.printStackTrace(); } catch (Exception e2) { e2.printStackTrace(); } } return rsltdata; }
From source file:gnusmail.filters.SingleAttFilter.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document document) { try {/*from w w w . j a va 2 s . c o m*/ int index = attribute.indexOfValue(getSingleValue(document)); inst.setValue(attribute, index); } catch (MessagingException e) { inst.setMissing(attribute); } }
From source file:gnusmail.filters.SingleNumericAttFilter.java
License:Open Source License
@Override public void updateInstance(Instance inst, Document doc) { try {//from ww w . j ava2 s . c o m inst.setValue(attribute, getSingleValue(doc)); } catch (MessagingException e) { inst.setMissing(attribute); } }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());/* w ww .j a v a 2s . co m*/ } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
/** * Converts an input space point to a Weka instance. * @param point/* w w w . j a v a2 s .c om*/ * @return */ public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) { Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions()); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); } for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) { if (e.getValue() == null) { inst.setMissing(index++); } else { Attribute att = new Attribute(e.getKey(), index++); inst.setValue(att, e.getValue()); } } //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); for (String k : outputPoint.getOutputPoints().keySet()) { att.addElement(new Attribute(k, index++)); } Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
public static Instance convertPointToInstance(InputSpacePoint point) { Instance inst = new Instance(point.numberDimensions() + 1); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); }//from w w w . j a va 2s .c o m inst.setMissing(index); //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); att.addElement(new Attribute("objective", index++)); Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }
From source file:matres.MatResUI.java
private void doClassification() { J48 m_treeResiko;//www . j a v a 2 s . com J48 m_treeAksi; NaiveBayes m_nbResiko; NaiveBayes m_nbAksi; FastVector m_fvInstanceRisks; FastVector m_fvInstanceActions; InputStream isRiskTree = getClass().getResourceAsStream("data/ResikoTree.model"); InputStream isRiskNB = getClass().getResourceAsStream("data/ResikoNB.model"); InputStream isActionTree = getClass().getResourceAsStream("data/AksiTree.model"); InputStream isActionNB = getClass().getResourceAsStream("data/AksiNB.model"); m_treeResiko = new J48(); m_treeAksi = new J48(); m_nbResiko = new NaiveBayes(); m_nbAksi = new NaiveBayes(); try { //m_treeResiko = (J48) weka.core.SerializationHelper.read("ResikoTree.model"); m_treeResiko = (J48) weka.core.SerializationHelper.read(isRiskTree); //m_nbResiko = (NaiveBayes) weka.core.SerializationHelper.read("ResikoNB.model"); m_nbResiko = (NaiveBayes) weka.core.SerializationHelper.read(isRiskNB); //m_treeAksi = (J48) weka.core.SerializationHelper.read("AksiTree.model"); m_treeAksi = (J48) weka.core.SerializationHelper.read(isActionTree); //m_nbAksi = (NaiveBayes) weka.core.SerializationHelper.read("AksiNB.model"); m_nbAksi = (NaiveBayes) weka.core.SerializationHelper.read(isActionNB); } catch (Exception ex) { Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Setting up an Instance..."); // Values for LIKELIHOOD OF OCCURRENCE FastVector fvLO = new FastVector(5); fvLO.addElement("> 10 in 1 year"); fvLO.addElement("1 - 10 in 1 year"); fvLO.addElement("1 in 1 year to 1 in 10 years"); fvLO.addElement("1 in 10 years to 1 in 100 years"); fvLO.addElement("1 in more than 100 years"); // Values for SAFETY FastVector fvSafety = new FastVector(5); fvSafety.addElement("near miss"); fvSafety.addElement("first aid injury, medical aid injury"); fvSafety.addElement("lost time injury / temporary disability"); fvSafety.addElement("permanent disability"); fvSafety.addElement("fatality"); // Values for EXTRA FUEL COST FastVector fvEFC = new FastVector(5); fvEFC.addElement("< 100 million rupiah"); fvEFC.addElement("0,1 - 1 billion rupiah"); fvEFC.addElement("1 - 10 billion rupiah"); fvEFC.addElement("10 - 100 billion rupiah"); fvEFC.addElement("> 100 billion rupiah"); // Values for SYSTEM RELIABILITY FastVector fvSR = new FastVector(5); fvSR.addElement("< 100 MWh"); fvSR.addElement("0,1 - 1 GWh"); fvSR.addElement("1 - 10 GWh"); fvSR.addElement("10 - 100 GWh"); fvSR.addElement("> 100 GWh"); // Values for EQUIPMENT COST FastVector fvEC = new FastVector(5); fvEC.addElement("< 50 million rupiah"); fvEC.addElement("50 - 500 million rupiah"); fvEC.addElement("0,5 - 5 billion rupiah"); fvEC.addElement("5 -50 billion rupiah"); fvEC.addElement("> 50 billion rupiah"); // Values for CUSTOMER SATISFACTION SOCIAL FACTOR FastVector fvCSSF = new FastVector(5); fvCSSF.addElement("Complaint from the VIP customer"); fvCSSF.addElement("Complaint from industrial customer"); fvCSSF.addElement("Complaint from community"); fvCSSF.addElement("Complaint from community that have potential riot"); fvCSSF.addElement("High potential riot"); // Values for RISK FastVector fvRisk = new FastVector(4); fvRisk.addElement("Low"); fvRisk.addElement("Moderate"); fvRisk.addElement("High"); fvRisk.addElement("Extreme"); // Values for ACTION FastVector fvAction = new FastVector(3); fvAction.addElement("Life Extension Program"); fvAction.addElement("Repair/Refurbish"); fvAction.addElement("Replace/Run to Fail + Investment"); // Defining Attributes, including Class(es) Attributes Attribute attrLO = new Attribute("LO", fvLO); Attribute attrSafety = new Attribute("Safety", fvSafety); Attribute attrEFC = new Attribute("EFC", fvEFC); Attribute attrSR = new Attribute("SR", fvSR); Attribute attrEC = new Attribute("EC", fvEC); Attribute attrCSSF = new Attribute("CSSF", fvCSSF); Attribute attrRisk = new Attribute("Risk", fvRisk); Attribute attrAction = new Attribute("Action", fvAction); m_fvInstanceRisks = new FastVector(7); m_fvInstanceRisks.addElement(attrLO); m_fvInstanceRisks.addElement(attrSafety); m_fvInstanceRisks.addElement(attrEFC); m_fvInstanceRisks.addElement(attrSR); m_fvInstanceRisks.addElement(attrEC); m_fvInstanceRisks.addElement(attrCSSF); m_fvInstanceRisks.addElement(attrRisk); m_fvInstanceActions = new FastVector(7); m_fvInstanceActions.addElement(attrLO); m_fvInstanceActions.addElement(attrSafety); m_fvInstanceActions.addElement(attrEFC); m_fvInstanceActions.addElement(attrSR); m_fvInstanceActions.addElement(attrEC); m_fvInstanceActions.addElement(attrCSSF); m_fvInstanceActions.addElement(attrAction); Instances dataRisk = new Instances("A-Risk-instance-to-classify", m_fvInstanceRisks, 0); Instances dataAction = new Instances("An-Action-instance-to-classify", m_fvInstanceActions, 0); double[] riskValues = new double[dataRisk.numAttributes()]; double[] actionValues = new double[dataRisk.numAttributes()]; String strLO = (String) m_cmbLO.getSelectedItem(); String strSafety = (String) m_cmbSafety.getSelectedItem(); String strEFC = (String) m_cmbEFC.getSelectedItem(); String strSR = (String) m_cmbSR.getSelectedItem(); String strEC = (String) m_cmbEC.getSelectedItem(); String strCSSF = (String) m_cmbCSSF.getSelectedItem(); Instance instRisk = new DenseInstance(7); Instance instAction = new DenseInstance(7); if (strLO.equals("-- none --")) { instRisk.setMissing(0); instAction.setMissing(0); } else { instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(0), strLO); instAction.setValue((Attribute) m_fvInstanceActions.elementAt(0), strLO); } if (strSafety.equals("-- none --")) { instRisk.setMissing(1); instAction.setMissing(1); } else { instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(1), strSafety); instAction.setValue((Attribute) m_fvInstanceActions.elementAt(1), strSafety); } if (strEFC.equals("-- none --")) { instRisk.setMissing(2); instAction.setMissing(2); } else { instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(2), strEFC); instAction.setValue((Attribute) m_fvInstanceActions.elementAt(2), strEFC); } if (strSR.equals("-- none --")) { instRisk.setMissing(3); instAction.setMissing(3); } else { instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(3), strSR); instAction.setValue((Attribute) m_fvInstanceActions.elementAt(3), strSR); } if (strEC.equals("-- none --")) { instRisk.setMissing(4); instAction.setMissing(4); } else { instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(4), strEC); instAction.setValue((Attribute) m_fvInstanceActions.elementAt(4), strEC); } if (strCSSF.equals("-- none --")) { instRisk.setMissing(5); instAction.setMissing(5); } else { instAction.setValue((Attribute) m_fvInstanceActions.elementAt(5), strCSSF); instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(5), strCSSF); } instRisk.setMissing(6); instAction.setMissing(6); dataRisk.add(instRisk); instRisk.setDataset(dataRisk); dataRisk.setClassIndex(dataRisk.numAttributes() - 1); dataAction.add(instAction); instAction.setDataset(dataAction); dataAction.setClassIndex(dataAction.numAttributes() - 1); System.out.println("Instance Resiko: " + dataRisk.instance(0)); System.out.println("\tNum Attributes : " + dataRisk.numAttributes()); System.out.println("\tNum instances : " + dataRisk.numInstances()); System.out.println("Instance Action: " + dataAction.instance(0)); System.out.println("\tNum Attributes : " + dataAction.numAttributes()); System.out.println("\tNum instances : " + dataAction.numInstances()); int classIndexRisk = 0; int classIndexAction = 0; String strClassRisk = null; String strClassAction = null; try { //classIndexRisk = (int) m_treeResiko.classifyInstance(dataRisk.instance(0)); classIndexRisk = (int) m_treeResiko.classifyInstance(instRisk); classIndexAction = (int) m_treeAksi.classifyInstance(instAction); } catch (Exception ex) { Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex); } strClassRisk = (String) fvRisk.elementAt(classIndexRisk); strClassAction = (String) fvAction.elementAt(classIndexAction); System.out.println("[Risk Class Index: " + classIndexRisk + " Class Label: " + strClassRisk + "]"); System.out.println("[Action Class Index: " + classIndexAction + " Class Label: " + strClassAction + "]"); if (strClassRisk != null) { m_txtRisk.setText(strClassRisk); } double[] riskDist = null; double[] actionDist = null; try { riskDist = m_nbResiko.distributionForInstance(dataRisk.instance(0)); actionDist = m_nbAksi.distributionForInstance(dataAction.instance(0)); String strProb; // set up RISK progress bars m_jBarRiskLow.setValue((int) (100 * riskDist[0])); m_jBarRiskLow.setString(String.format("%6.3f%%", 100 * riskDist[0])); m_jBarRiskModerate.setValue((int) (100 * riskDist[1])); m_jBarRiskModerate.setString(String.format("%6.3f%%", 100 * riskDist[1])); m_jBarRiskHigh.setValue((int) (100 * riskDist[2])); m_jBarRiskHigh.setString(String.format("%6.3f%%", 100 * riskDist[2])); m_jBarRiskExtreme.setValue((int) (100 * riskDist[3])); m_jBarRiskExtreme.setString(String.format("%6.3f%%", 100 * riskDist[3])); } catch (Exception ex) { Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex); } double predictedProb = 0.0; String predictedClass = ""; // Loop over all the prediction labels in the distribution. for (int predictionDistributionIndex = 0; predictionDistributionIndex < riskDist.length; predictionDistributionIndex++) { // Get this distribution index's class label. String predictionDistributionIndexAsClassLabel = dataRisk.classAttribute() .value(predictionDistributionIndex); int classIndex = dataRisk.classAttribute().indexOfValue(predictionDistributionIndexAsClassLabel); // Get the probability. double predictionProbability = riskDist[predictionDistributionIndex]; if (predictionProbability > predictedProb) { predictedProb = predictionProbability; predictedClass = predictionDistributionIndexAsClassLabel; } System.out.printf("[%2d %10s : %6.3f]", classIndex, predictionDistributionIndexAsClassLabel, predictionProbability); } m_txtRiskNB.setText(predictedClass); }