Example usage for weka.core Instance setMissing

Introduction

In this page you can find the example usage for weka.core Instance setMissing.

Prototype

public void setMissing(Attribute att);

Source Link

Document

Sets a specific value to be "missing".

Usage

From source file:edu.brandeis.wisedb.scheduler.training.decisiontree.DTSearcher.java

License:Open Source License

@Override
public List<Action> schedule(Set<ModelQuery> toSched) {
    SingularMachineState start = new SingularMachineState(toSched, qtp, sla);
    List<Action> toR = new LinkedList<Action>();

    applyLoop: while (!start.isGoalState()) {
        log.fine("Current state: " + start);

        SortedMap<String, String> features = start.getFeatures();
        Instance toClassify = new Instance(attributes.length);
        toClassify.setDataset(wekaDataSet);

        for (Attribute a : attributes) {
            if (a.name().equals("action")) {
                //toClassify.setValue(a, "N");
                continue;
            }//from w ww  .ja va2  s  . com

            try {

                if (features.get(a.name()).equals("?")) {
                    toClassify.setMissing(a);
                    continue;
                }
                try {
                    double d = Double.valueOf(features.get(a.name()));
                    toClassify.setValue(a, d);
                } catch (NumberFormatException e) {
                    toClassify.setValue(a, features.get(a.name()));
                }
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                log.warning(
                        "Encountered previously unseen attribute value! Might need better training data... making random selection.");
                log.warning("Value for attribute " + a.name() + " was " + features.get(a.name()));
                Action rand = getPUAction(start);
                log.warning("Random action selected: " + rand);
                toR.add(rand);
                start.applyAction(rand);
                continue applyLoop;
            }
        }

        toClassify.setClassMissing();
        log.finer("Going to classify: " + toClassify);

        try {
            double d = tree.classifyInstance(toClassify);
            toClassify.setClassValue(d);
            String action = toClassify.stringValue(toClassify.classIndex());
            log.finer("Got action string: " + action);

            Action selected = null;
            for (Action a : start.getPossibleActions()) {
                if (actionMatches(a, action)) {
                    selected = a;
                    break;
                }
            }

            if (selected == null) {
                //log.warning("Could not find applicable action for string: " + action + " ... picking random action");
                Action a = getPUAction(start);
                start.applyAction(a);
                toR.add(a);
                continue;
            }

            log.fine("Selected action: " + selected);

            start.applyAction(selected);

            toR.add(selected);

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            return null;
        }
    }

    return toR;
}

From source file:elh.eus.absa.Features.java

License:Open Source License

/**
 *   Function fills the attribute vectors for the instances existing in the corpus given. 
 *   Attribute vectors contain the features loaded by the creatFeatureSet() function.
 * /*from   w  w  w. ja va 2 s.c om*/
 * @param boolean save : whether the Instances file should be saved to an arff file or not.
 * @return Weka Instances object containing the attribute vectors filled with the features specified
 *          in the parameter file.
 */
public Instances loadInstances(boolean save, String prefix) throws IOException {
    String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_"
            + prefix;
    HashMap<String, Opinion> trainExamples = corpus.getOpinions();

    int trainExamplesNum = trainExamples.size();

    int bowWin = 0;
    if (params.containsKey("window")) {
        bowWin = Integer.parseInt(params.getProperty("window"));
        savePath = savePath + "_w" + bowWin;
    }

    //Properties posProp = new Properties();
    //eus.ixa.ixa.pipe.pos.Annotate postagger = new eus.ixa.ixa.pipe.pos.Annotate(posProp);      
    if (params.containsKey("lemmaNgrams")) {
        Properties posProp = NLPpipelineWrapper.setPostaggerProperties(params.getProperty("pos-model"),
                params.getProperty("lemma-model"), corpus.getLang(), "bin", "false");

        postagger = new eus.ixa.ixa.pipe.pos.Annotate(posProp);
    }

    //System.out.println("train examples: "+trainExamplesNum);
    //Create the Weka object for the training set
    Instances rsltdata = new Instances("train", atts, trainExamplesNum);

    // setting class attribute (last attribute in train data.
    //traindata.setClassIndex(traindata.numAttributes() - 1);

    System.err.println("Features: loadInstances() - featNum: " + this.featNum + " - trainset attrib num -> "
            + rsltdata.numAttributes() + " - ");
    System.out.println("Features: loadInstances() - featNum: " + this.featNum + " - trainset attrib num -> "
            + rsltdata.numAttributes() + " - ");

    int instId = 1;
    // fill the vectors for each training example
    for (String oId : trainExamples.keySet()) {
        //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId()));

        //value vector
        double[] values = new double[featNum];

        // first element is the instanceId         
        values[rsltdata.attribute("instanceId").index()] = instId;

        // string normalization (emoticons, twitter grammar,...)
        String opNormalized = corpus.getOpinionSentence(oId);

        // compute uppercase ratio before normalization (if needed)      
        double upRatio = 0.0;
        if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes")) {
            String upper = opNormalized.replaceAll("[\\p{Ll}]", "");
            upRatio = (double) upper.length() / (double) opNormalized.length();
            values[rsltdata.attribute("upperCaseRation").index()] = upRatio;
        }

        // string normalization (emoticons, twitter grammar,...)
        if ((params.containsKey("wfngrams") || params.containsKey("lemmaNgrams"))
                && (!params.getProperty("normalization", "none").equalsIgnoreCase("noEmot"))) {
            opNormalized = normalize(opNormalized, params.getProperty("normalization", "none"));
        }

        //process the current instance with the NLP pipeline in order to get token and lemma|pos features
        KAFDocument nafinst = new KAFDocument("", "");
        String nafname = trainExamples.get(oId).getsId().replace(':', '_');
        String nafDir = params.getProperty("kafDir");
        String nafPath = nafDir + File.separator + nafname + ".kaf";
        //counter for opinion sentence token number. Used for computing relative values of the features
        int tokNum = 1;
        try {
            if (params.containsKey("lemmaNgrams")) //(lemmaNgrams != null) && (!lemmaNgrams.isEmpty()))
            {
                if (FileUtilsElh.checkFile(nafPath)) {
                    nafinst = KAFDocument.createFromFile(new File(nafPath));
                } else {
                    nafinst = NLPpipelineWrapper.ixaPipesTokPos(opNormalized, corpus.getLang(),
                            params.getProperty("pos-model"), params.getProperty("lemma-model"), postagger);
                    Files.createDirectories(Paths.get(nafDir));
                    nafinst.save(nafPath);
                }
                tokNum = nafinst.getWFs().size();
                //System.err.println("Features::loadInstances - postagging opinion sentence ("+oId+") - "+corpus.getOpinionSentence(oId));
            } else {
                if (FileUtilsElh.checkFile(nafPath)) {
                    nafinst = KAFDocument.createFromFile(new File(nafPath));
                } else {
                    nafinst = NLPpipelineWrapper.ixaPipesTok(opNormalized, corpus.getLang());
                }
                tokNum = nafinst.getWFs().size();
                //System.err.println("Features::loadInstances - tokenizing opinion sentence ("+oId+") - "+corpus.getOpinionSentence(oId));

            }
        } catch (IOException | JDOMException e) {
            System.err.println("Features::loadInstances() - error when NLP processing the instance " + instId
                    + "|" + oId + ") for filling the attribute vector");
            e.printStackTrace();
            System.exit(5);
        }

        LinkedList<String> ngrams = new LinkedList<String>();
        int ngramDim;
        try {
            ngramDim = Integer.valueOf(params.getProperty("wfngrams"));
        } catch (Exception e) {
            ngramDim = 0;
        }

        boolean polNgrams = false;
        if (params.containsKey("polNgrams")) {
            polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes");
        }

        List<WF> window = nafinst.getWFs();
        Integer end = corpus.getOpinion(oId).getTo();
        // apply window if window active (>0) and if the target is not null (to=0)
        if ((bowWin > 0) && (end > 0)) {
            Integer start = corpus.getOpinion(oId).getFrom();
            Integer to = window.size();
            Integer from = 0;
            end++;
            for (int i = 0; i < window.size(); i++) {
                WF wf = window.get(i);
                if ((wf.getOffset() == start) && (i >= bowWin)) {
                    from = i - bowWin;
                } else if (wf.getOffset() >= end) {
                    if (i + bowWin < window.size()) {
                        to = i + bowWin;
                    }
                    break;
                }
            }
            window = window.subList(from, to);
            //System.out.println("startTgt: "+start+" - from: "+from+" | endTrgt:"+(end-1)+" - to:"+to);
        }

        //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+
        //      "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n");

        List<String> windowWFIds = new ArrayList<String>();

        // word form ngram related features
        for (WF wf : window) {
            windowWFIds.add(wf.getId());

            String wfStr = wf.getForm();
            if (params.containsKey("wfngrams") && ngramDim > 0) {
                if (!savePath.contains("_wf" + ngramDim)) {
                    savePath = savePath + "_wf" + ngramDim;
                }
                //if the current word form is in the ngram list activate the feature in the vector
                if (ngrams.size() >= ngramDim) {
                    ngrams.removeFirst();
                }
                ngrams.add(wfStr);

                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "wf", 1, false); //toknum

            }
            // Clark cluster info corresponding to the current word form
            if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) {
                if (!savePath.contains("_cl")) {
                    savePath = savePath + "_cl";
                }
                values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) {
                if (!savePath.contains("_br")) {
                    savePath = savePath + "_br";
                }
                values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) {
                if (!savePath.contains("_w2v")) {
                    savePath = savePath + "_w2v";
                }
                values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++;
            }

        }

        //empty ngram list and add remaining ngrams to the feature list
        checkNgramFeatures(ngrams, values, "wf", 1, true); //toknum

        // PoS tagger related attributes: lemmas and pos tags
        if (params.containsKey("lemmaNgrams")
                || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0"))
                || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) {
            ngrams = new LinkedList<String>();
            if (params.containsKey("lemmaNgrams")
                    && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) {
                ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams"));
            } else {
                ngramDim = 3;
            }
            LinkedList<String> posNgrams = new LinkedList<String>();
            int posNgramDim = 0;
            if (params.containsKey("pos")) {
                posNgramDim = Integer.valueOf(params.getProperty("pos"));
            }

            for (Term t : nafinst.getTermsFromWFs(windowWFIds)) {
                //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))
                if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral")
                        || params.containsKey("polarLexiconDomain")) {
                    if (!savePath.contains("_l" + ngramDim)) {
                        savePath = savePath + "_l" + ngramDim;
                    }

                    String lemma = t.getLemma();

                    if (ngrams.size() >= ngramDim) {
                        ngrams.removeFirst();
                    }
                    ngrams.add(lemma);

                    // add ngrams to the feature vector
                    for (int i = 0; i < ngrams.size(); i++) {
                        String ng = featureFromArray(ngrams.subList(0, i + 1), "lemma");
                        //if the current lemma is in the ngram list activate the feature in the vector
                        if (params.containsKey("lemmaNgrams")
                                && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) {
                            Attribute ngAtt = rsltdata.attribute(ng);
                            if (ngAtt != null) {
                                addNumericToFeatureVector(ng, values, 1); //tokNum                     
                            }
                        }

                        ng = featureFromArray(ngrams.subList(0, i + 1), "");
                        if (params.containsKey("polarLexiconGeneral")
                                || params.containsKey("polarLexiconDomain")) {
                            checkPolarityLexicons(ng, values, tokNum, polNgrams);
                        } //end polarity ngram checker
                    } //end ngram checking                                      
                }
                //pos tags
                if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) {
                    if (!savePath.contains("_p")) {
                        savePath = savePath + "_p";
                    }

                    if (posNgrams.size() >= posNgramDim) {
                        posNgrams.removeFirst();
                    }
                    posNgrams.add(t.getPos());

                    // add ngrams to the feature vector
                    checkNgramFeatures(posNgrams, values, "pos", 1, false);
                }
            } //endFor

            //empty ngram list and add remaining ngrams to the feature list
            while (!ngrams.isEmpty()) {
                String ng = featureFromArray(ngrams, "lemma");

                //if the current lemma is in the ngram list activate the feature in the vector
                if (rsltdata.attribute(ng) != null) {
                    addNumericToFeatureVector(ng, values, 1); //tokNum
                }

                // polarity lexicons
                if (params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) {
                    checkPolarityLexicons(ng, values, tokNum, polNgrams);
                } //end polarity ngram checker

                ngrams.removeFirst();
            }

            //empty pos ngram list and add remaining pos ngrams to the feature list
            checkNgramFeatures(posNgrams, values, "pos", 1, true);

        }

        // add sentence length as a feature
        if (params.containsKey("sentenceLength")
                && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) {
            values[rsltdata.attribute("sentenceLength").index()] = tokNum;
        }

        //create object for the current instance and associate it with the current train dataset.         
        Instance inst = new SparseInstance(1.0, values);
        inst.setDataset(rsltdata);

        // add category attributte values
        String cat = trainExamples.get(oId).getCategory();

        if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) {
            if (cat.compareTo("NULL") == 0) {
                inst.setValue(rsltdata.attribute("entCat").index(), cat);
                inst.setValue(rsltdata.attribute("attCat").index(), cat);
            } else {
                String[] splitCat = cat.split("#");
                inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]);
                inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]);
            }

            //inst.setValue(attIndexes.get("entAttCat"), cat);
        } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) {
            inst.setValue(rsltdata.attribute("entAttCat").index(), cat);
        }

        if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) {
            // add class value as a double (Weka stores all values as doubles )
            String pol = normalizePolarity(trainExamples.get(oId).getPolarity());
            //System.err.println("Features::loadInstances - pol "+pol+" for oid "+oId+" - text:"+corpus.getOpinionSentence(oId));
            if (pol != null && !pol.isEmpty()) {
                //System.err.println("polarity: _"+pol+"_");
                inst.setValue(rsltdata.attribute("polarityCat"), pol);
            } else {
                inst.setMissing(rsltdata.attribute("polarityCat"));
            }
        }

        //add instance to train data
        rsltdata.add(inst);

        //store opinion Id and instance Id
        this.opInst.put(oId, instId);
        instId++;
    }

    System.err.println("Features : loadInstances() - training data ready total number of examples -> "
            + trainExamplesNum + " - " + rsltdata.numInstances());

    if (save) {
        try {
            savePath = savePath + ".arff";
            System.err.println("arff written to: " + savePath);
            ArffSaver saver = new ArffSaver();

            saver.setInstances(rsltdata);

            saver.setFile(new File(savePath));
            saver.writeBatch();
        } catch (IOException e1) {
            e1.printStackTrace();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }
    return rsltdata;
}

From source file:elh.eus.absa.Features.java

License:Open Source License

/**
 *   Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. 
 *   Attribute vectors contain the features loaded by the creatFeatureSet() function.
 * /* w  w w. ja  v a  2 s.  com*/
 * @param boolean save : whether the Instances file should be saved to an arff file or not.
 * @return Weka Instances object containing the attribute vectors filled with the features specified
 *          in the parameter file.
 */
public Instances loadInstancesTAB(boolean save, String prefix) {
    String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_"
            + prefix;
    HashMap<String, Opinion> trainExamples = corpus.getOpinions();

    int trainExamplesNum = trainExamples.size();

    int bowWin = 0;
    if (params.containsKey("window")) {
        bowWin = Integer.parseInt(params.getProperty("window"));
        savePath = savePath + "_w" + bowWin;
    }

    //System.out.println("train examples: "+trainExamplesNum);
    //Create the Weka object for the training set
    Instances rsltdata = new Instances("train", atts, trainExamplesNum);

    // setting class attribute (last attribute in train data.
    //traindata.setClassIndex(traindata.numAttributes() - 1);

    System.err.println("Features: loadInstancesTAB() - featNum: " + this.featNum + " - trainset attrib num -> "
            + rsltdata.numAttributes() + " - ");
    System.out.println("Features: loadInstancesTAB() - featNum: " + this.featNum + " - trainset attrib num -> "
            + rsltdata.numAttributes() + " - ");

    int instId = 1;
    // fill the vectors for each training example
    for (String oId : trainExamples.keySet()) {
        //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId()));

        //value vector
        double[] values = new double[featNum];

        // first element is the instanceId         
        values[rsltdata.attribute("instanceId").index()] = instId;

        LinkedList<String> ngrams = new LinkedList<String>();
        int ngramDim;
        try {
            ngramDim = Integer.valueOf(params.getProperty("wfngrams"));
        } catch (Exception e) {
            ngramDim = 0;
        }

        boolean polNgrams = false;
        if (params.containsKey("polNgrams")) {
            polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes");
        }

        String[] noWindow = corpus.getOpinionSentence(oId).split("\n");

        //counter for opinion sentence token number. Used for computing relative values of the features
        int tokNum = noWindow.length;

        List<String> window = Arrays.asList(noWindow);
        Integer end = corpus.getOpinion(oId).getTo();
        // apply window if window active (>0) and if the target is not null (to=0)
        if ((bowWin > 0) && (end > 0)) {
            Integer start = corpus.getOpinion(oId).getFrom();
            Integer from = start - bowWin;
            if (from < 0) {
                from = 0;
            }
            Integer to = end + bowWin;
            if (to > noWindow.length - 1) {
                to = noWindow.length - 1;
            }
            window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to));
        }

        //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+
        //      "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n");

        //System.err.println(Arrays.toString(window.toArray()));

        // word form ngram related features
        for (String wf : window) {
            String[] fields = wf.split("\t");
            String wfStr = normalize(fields[0], params.getProperty("normalization", "none"));
            // blank line means we found a sentence end. Empty n-gram list and reiniciate.  
            if (wf.equals("")) {
                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, true); //toknum

                // since wf is empty no need to check for clusters and other features.
                continue;
            }

            if (params.containsKey("wfngrams") && ngramDim > 0) {
                if (!savePath.contains("_wf" + ngramDim)) {
                    savePath = savePath + "_wf" + ngramDim;
                }
                //if the current word form is in the ngram list activate the feature in the vector
                if (ngrams.size() >= ngramDim) {
                    ngrams.removeFirst();
                }
                ngrams.add(wfStr);

                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, false); //toknum
            }
            // Clark cluster info corresponding to the current word form
            if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) {
                if (!savePath.contains("_cl")) {
                    savePath = savePath + "_cl";
                }
                values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) {
                if (!savePath.contains("_br")) {
                    savePath = savePath + "_br";
                }
                values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) {
                if (!savePath.contains("_w2v")) {
                    savePath = savePath + "_w2v";
                }
                values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++;
            }

        }

        //empty ngram list and add remaining ngrams to the feature list
        checkNgramFeatures(ngrams, values, "", 1, true); //toknum

        // PoS tagger related attributes: lemmas and pos tags
        if (params.containsKey("lemmaNgrams")
                || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0"))
                || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) {
            ngrams = new LinkedList<String>();
            if (params.containsKey("lemmaNgrams")
                    && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) {
                ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams"));
            } else {
                ngramDim = 3;
            }
            LinkedList<String> posNgrams = new LinkedList<String>();
            int posNgramDim = 0;
            if (params.containsKey("pos")) {
                posNgramDim = Integer.valueOf(params.getProperty("pos"));
            }

            for (String t : window) {
                //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))
                if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral")
                        || params.containsKey("polarLexiconDomain")) {
                    if (!savePath.contains("_l" + ngramDim)) {
                        savePath = savePath + "_l" + ngramDim;
                    }

                    //blank line means we found a sentence end. Empty n-gram list and reiniciate.
                    if (t.equals("")) {
                        // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                        checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum

                        // since t is empty no need to check for clusters and other features.
                        continue;
                    }

                    String[] fields = t.split("\t");
                    if (fields.length < 2) {
                        continue;
                    }
                    String lemma = normalize(fields[1], params.getProperty("normalization", "none"));

                    if (ngrams.size() >= ngramDim) {
                        ngrams.removeFirst();
                    }
                    ngrams.add(lemma);

                    // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                    checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams);

                }

                //pos tags
                if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) {
                    if (!savePath.contains("_p")) {
                        savePath = savePath + "_p";
                    }

                    if (posNgrams.size() >= posNgramDim) {
                        posNgrams.removeFirst();
                    }

                    String[] fields = t.split("\t");
                    if (fields.length < 3) {
                        continue;
                    }
                    String pos = fields[2];

                    posNgrams.add(pos);

                    // add ngrams to the feature vector
                    checkNgramFeatures(posNgrams, values, "pos", 1, false);
                }
            } //endFor

            //empty ngram list and add remaining ngrams to the feature list
            // check both lemma n-grams and polarity lexicons, and add values to the feature vector
            checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams);

            //empty pos ngram list and add remaining pos ngrams to the feature list
            checkNgramFeatures(posNgrams, values, "pos", 1, true);

        }

        // add sentence length as a feature
        if (params.containsKey("sentenceLength")
                && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) {
            values[rsltdata.attribute("sentenceLength").index()] = tokNum;
        }

        // compute uppercase ratio before normalization (if needed)      
        //double upRatio =0.0;
        //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes"))
        //{
        //   String upper = opNormalized.replaceAll("[a-z]", "");
        //   upRatio = (double)upper.length() / (double)opNormalized.length();
        //   values[rsltdata.attribute("upperCaseRation").index()] = upRatio;
        //}

        //create object for the current instance and associate it with the current train dataset.         
        Instance inst = new SparseInstance(1.0, values);
        inst.setDataset(rsltdata);

        // add category attributte values
        String cat = trainExamples.get(oId).getCategory();

        if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) {
            if (cat.compareTo("NULL") == 0) {
                inst.setValue(rsltdata.attribute("entCat").index(), cat);
                inst.setValue(rsltdata.attribute("attCat").index(), cat);
            } else {
                String[] splitCat = cat.split("#");
                inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]);
                inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]);
            }

            //inst.setValue(attIndexes.get("entAttCat"), cat);
        } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) {
            inst.setValue(rsltdata.attribute("entAttCat").index(), cat);
        }

        if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) {
            // add class value as a double (Weka stores all values as doubles )
            String pol = normalizePolarity(trainExamples.get(oId).getPolarity());
            if (pol != null && !pol.isEmpty()) {
                inst.setValue(rsltdata.attribute("polarityCat"), pol);
            } else {
                //System.err.println("polarity: _"+pol+"_");
                inst.setMissing(rsltdata.attribute("polarityCat"));
            }
        }

        //add instance to train data
        rsltdata.add(inst);

        //store opinion Id and instance Id
        this.opInst.put(oId, instId);
        instId++;
    }

    System.err.println("Features : loadInstancesTAB() - training data ready total number of examples -> "
            + trainExamplesNum + " - " + rsltdata.numInstances());

    if (save) {
        try {
            savePath = savePath + ".arff";
            System.err.println("arff written to: " + savePath);
            ArffSaver saver = new ArffSaver();

            saver.setInstances(rsltdata);

            saver.setFile(new File(savePath));
            saver.writeBatch();
        } catch (IOException e1) {
            e1.printStackTrace();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    return rsltdata;
}

From source file:elh.eus.absa.Features.java

License:Open Source License

/**
 *   Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. 
 *   Attribute vectors contain the features loaded by the creatFeatureSet() function.
 * /*from   ww w .  j a va 2 s . c  om*/
 * @param boolean save : whether the Instances file should be saved to an arff file or not.
 * @return Weka Instances object containing the attribute vectors filled with the features specified
 *          in the parameter file.
 */
public Instances loadInstancesConll(boolean save, String prefix) {
    String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_"
            + prefix;
    HashMap<String, Opinion> trainExamples = corpus.getOpinions();

    String nafdir = params.getProperty("kafDir");
    int trainExamplesNum = trainExamples.size();

    int bowWin = 0;
    if (params.containsKey("window")) {
        bowWin = Integer.parseInt(params.getProperty("window"));
        savePath = savePath + "_w" + bowWin;
    }

    //System.out.println("train examples: "+trainExamplesNum);
    //Create the Weka object for the training set
    Instances rsltdata = new Instances("train", atts, trainExamplesNum);

    // setting class attribute (last attribute in train data.
    //traindata.setClassIndex(traindata.numAttributes() - 1);

    System.err.println("Features: loadInstancesConll() - featNum: " + this.featNum
            + " - trainset attrib num -> " + rsltdata.numAttributes() + " - ");
    System.out.println("Features: loadInstancesConll() - featNum: " + this.featNum
            + " - trainset attrib num -> " + rsltdata.numAttributes() + " - ");

    int instId = 1;
    // fill the vectors for each training example
    for (String oId : trainExamples.keySet()) {
        //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId()));

        //value vector
        double[] values = new double[featNum];

        // first element is the instanceId         
        values[rsltdata.attribute("instanceId").index()] = instId;

        LinkedList<String> ngrams = new LinkedList<String>();
        int ngramDim;
        try {
            ngramDim = Integer.valueOf(params.getProperty("wfngrams"));
        } catch (Exception e) {
            ngramDim = 0;
        }

        boolean polNgrams = false;
        if (params.containsKey("polNgrams")) {
            polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes");
        }

        String nafPath = nafdir + File.separator + trainExamples.get(oId).getsId().replace(':', '_');
        String taggedFile = "";
        try {
            if (!FileUtilsElh.checkFile(nafPath + ".kaf")) {
                nafPath = NLPpipelineWrapper.tagSentence(corpus.getOpinionSentence(oId), nafPath,
                        corpus.getLang(), params.getProperty("pos-model"), params.getProperty("lemma-model"),
                        postagger);
            } else {
                nafPath = nafPath + ".kaf";
            }
            InputStream reader = new FileInputStream(new File(nafPath));
            taggedFile = IOUtils.toString(reader);
            reader.close();
        } catch (IOException | JDOMException fe) {
            // TODO Auto-generated catch block
            fe.printStackTrace();
        }

        String[] noWindow = taggedFile.split("\n");

        //counter for opinion sentence token number. Used for computing relative values of the features
        int tokNum = noWindow.length;

        //System.err.println("Features::loadInstancesConll - tagged File read lines:"+tokNum);

        List<String> window = Arrays.asList(noWindow);
        Integer end = corpus.getOpinion(oId).getTo();
        // apply window if window active (>0) and if the target is not null (to=0)
        if ((bowWin > 0) && (end > 0)) {
            Integer start = corpus.getOpinion(oId).getFrom();
            Integer from = start - bowWin;
            if (from < 0) {
                from = 0;
            }
            Integer to = end + bowWin;
            if (to > noWindow.length - 1) {
                to = noWindow.length - 1;
            }
            window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to));
        }

        //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+
        //      "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n");

        //System.err.println(Arrays.toString(window.toArray()));

        // word form ngram related features
        for (String wf : window) {
            String[] fields = wf.split("\\s");
            String wfStr = normalize(fields[0], params.getProperty("normalization", "none"));
            // blank line means we found a sentence end. Empty n-gram list and reiniciate.  
            if (wf.equals("")) {
                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, true); //toknum

                // since wf is empty no need to check for clusters and other features.
                continue;
            }

            if (params.containsKey("wfngrams") && ngramDim > 0) {
                if (!savePath.contains("_wf" + ngramDim)) {
                    savePath = savePath + "_wf" + ngramDim;
                }
                //if the current word form is in the ngram list activate the feature in the vector
                if (ngrams.size() >= ngramDim) {
                    ngrams.removeFirst();
                }
                ngrams.add(wfStr);

                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, false); //toknum
            }
            // Clark cluster info corresponding to the current word form
            if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) {
                if (!savePath.contains("_cl")) {
                    savePath = savePath + "_cl";
                }
                values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) {
                if (!savePath.contains("_br")) {
                    savePath = savePath + "_br";
                }
                values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) {
                if (!savePath.contains("_w2v")) {
                    savePath = savePath + "_w2v";
                }
                values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++;
            }

        }

        //empty ngram list and add remaining ngrams to the feature list
        checkNgramFeatures(ngrams, values, "", 1, true); //toknum

        // PoS tagger related attributes: lemmas and pos tags
        if (params.containsKey("lemmaNgrams")
                || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0"))
                || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) {
            ngrams = new LinkedList<String>();
            if (params.containsKey("lemmaNgrams")
                    && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) {
                ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams"));
            } else {
                ngramDim = 3;
            }
            LinkedList<String> posNgrams = new LinkedList<String>();
            int posNgramDim = 0;
            if (params.containsKey("pos")) {
                posNgramDim = Integer.valueOf(params.getProperty("pos"));
            }

            for (String t : window) {
                //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))
                if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral")
                        || params.containsKey("polarLexiconDomain")) {
                    if (!savePath.contains("_l" + ngramDim)) {
                        savePath = savePath + "_l" + ngramDim;
                    }

                    //blank line means we found a sentence end. Empty n-gram list and reiniciate.
                    if (t.equals("")) {
                        // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                        checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum

                        // since t is empty no need to check for clusters and other features.
                        continue;
                    }

                    String[] fields = t.split("\\s");
                    if (fields.length < 2) {
                        continue;
                    }
                    String lemma = normalize(fields[1], params.getProperty("normalization", "none"));

                    if (ngrams.size() >= ngramDim) {
                        ngrams.removeFirst();
                    }
                    ngrams.add(lemma);

                    // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                    checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams);

                }

                //pos tags
                if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) {
                    if (!savePath.contains("_p")) {
                        savePath = savePath + "_p";
                    }

                    if (posNgrams.size() >= posNgramDim) {
                        posNgrams.removeFirst();
                    }

                    String[] fields = t.split("\\s");
                    if (fields.length < 3) {
                        continue;
                    }
                    String pos = fields[2];

                    posNgrams.add(pos);

                    // add ngrams to the feature vector
                    checkNgramFeatures(posNgrams, values, "pos", 1, false);
                }
            } //endFor

            //empty ngram list and add remaining ngrams to the feature list
            // check both lemma n-grams and polarity lexicons, and add values to the feature vector
            checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams);

            //empty pos ngram list and add remaining pos ngrams to the feature list
            checkNgramFeatures(posNgrams, values, "pos", 1, true);

        }

        // add sentence length as a feature
        if (params.containsKey("sentenceLength")
                && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) {
            values[rsltdata.attribute("sentenceLength").index()] = tokNum;
        }

        // compute uppercase ratio before normalization (if needed)      
        //double upRatio =0.0;
        //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes"))
        //{
        //   String upper = opNormalized.replaceAll("[a-z]", "");
        //   upRatio = (double)upper.length() / (double)opNormalized.length();
        //   values[rsltdata.attribute("upperCaseRation").index()] = upRatio;
        //}

        //create object for the current instance and associate it with the current train dataset.         
        Instance inst = new SparseInstance(1.0, values);
        inst.setDataset(rsltdata);

        // add category attributte values
        String cat = trainExamples.get(oId).getCategory();

        if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) {
            if (cat.compareTo("NULL") == 0) {
                inst.setValue(rsltdata.attribute("entCat").index(), cat);
                inst.setValue(rsltdata.attribute("attCat").index(), cat);
            } else {
                String[] splitCat = cat.split("#");
                inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]);
                inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]);
            }

            //inst.setValue(attIndexes.get("entAttCat"), cat);
        } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) {
            inst.setValue(rsltdata.attribute("entAttCat").index(), cat);
        }

        if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) {
            // add class value as a double (Weka stores all values as doubles )
            String pol = normalizePolarity(trainExamples.get(oId).getPolarity());
            if (pol != null && !pol.isEmpty()) {
                inst.setValue(rsltdata.attribute("polarityCat"), pol);
            } else {
                //System.err.println("polarity: _"+pol+"_");
                inst.setMissing(rsltdata.attribute("polarityCat"));
            }
        }

        //add instance to train data
        rsltdata.add(inst);

        //store opinion Id and instance Id
        this.opInst.put(oId, instId);
        instId++;
    }

    System.err.println("Features : loadInstancesConll() - training data ready total number of examples -> "
            + trainExamplesNum + " - " + rsltdata.numInstances());

    if (save) {
        try {
            savePath = savePath + ".arff";
            System.err.println("arff written to: " + savePath);
            ArffSaver saver = new ArffSaver();

            saver.setInstances(rsltdata);

            saver.setFile(new File(savePath));
            saver.writeBatch();
        } catch (IOException e1) {
            e1.printStackTrace();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    return rsltdata;
}

From source file:gnusmail.filters.SingleAttFilter.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document document) {
    try {/*from w w w .  j  a va  2  s . c o m*/
        int index = attribute.indexOfValue(getSingleValue(document));
        inst.setValue(attribute, index);
    } catch (MessagingException e) {
        inst.setMissing(attribute);
    }
}

From source file:gnusmail.filters.SingleNumericAttFilter.java

License:Open Source License

@Override
public void updateInstance(Instance inst, Document doc) {
    try {//from  ww w  . j ava2 s .  c o  m
        inst.setValue(attribute, getSingleValue(doc));
    } catch (MessagingException e) {
        inst.setMissing(attribute);
    }
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());/* w  ww  .j  a  v a  2s .  co  m*/
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

/**
 * Converts an input space point to a Weka instance.
 * @param point/* w  w  w  . j a v a2  s .c  om*/
 * @return 
 */
public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) {
    Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions());
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }
    for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) {
        if (e.getValue() == null) {
            inst.setMissing(index++);
        } else {
            Attribute att = new Attribute(e.getKey(), index++);
            inst.setValue(att, e.getValue());
        }
    }

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    for (String k : outputPoint.getOutputPoints().keySet()) {
        att.addElement(new Attribute(k, index++));
    }

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

public static Instance convertPointToInstance(InputSpacePoint point) {
    Instance inst = new Instance(point.numberDimensions() + 1);
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }//from  w w  w .  j a  va  2s .c o m
    inst.setMissing(index);

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    att.addElement(new Attribute("objective", index++));

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}

From source file:matres.MatResUI.java

private void doClassification() {
    J48 m_treeResiko;//www .  j a  v a  2  s .  com
    J48 m_treeAksi;
    NaiveBayes m_nbResiko;
    NaiveBayes m_nbAksi;
    FastVector m_fvInstanceRisks;
    FastVector m_fvInstanceActions;

    InputStream isRiskTree = getClass().getResourceAsStream("data/ResikoTree.model");
    InputStream isRiskNB = getClass().getResourceAsStream("data/ResikoNB.model");
    InputStream isActionTree = getClass().getResourceAsStream("data/AksiTree.model");
    InputStream isActionNB = getClass().getResourceAsStream("data/AksiNB.model");

    m_treeResiko = new J48();
    m_treeAksi = new J48();
    m_nbResiko = new NaiveBayes();
    m_nbAksi = new NaiveBayes();
    try {
        //m_treeResiko = (J48) weka.core.SerializationHelper.read("ResikoTree.model");
        m_treeResiko = (J48) weka.core.SerializationHelper.read(isRiskTree);
        //m_nbResiko = (NaiveBayes) weka.core.SerializationHelper.read("ResikoNB.model");
        m_nbResiko = (NaiveBayes) weka.core.SerializationHelper.read(isRiskNB);
        //m_treeAksi = (J48) weka.core.SerializationHelper.read("AksiTree.model");
        m_treeAksi = (J48) weka.core.SerializationHelper.read(isActionTree);
        //m_nbAksi = (NaiveBayes) weka.core.SerializationHelper.read("AksiNB.model");
        m_nbAksi = (NaiveBayes) weka.core.SerializationHelper.read(isActionNB);
    } catch (Exception ex) {
        Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex);
    }

    System.out.println("Setting up an Instance...");
    // Values for LIKELIHOOD OF OCCURRENCE
    FastVector fvLO = new FastVector(5);
    fvLO.addElement("> 10 in 1 year");
    fvLO.addElement("1 - 10 in 1 year");
    fvLO.addElement("1 in 1 year to 1 in 10 years");
    fvLO.addElement("1 in 10 years to 1 in 100 years");
    fvLO.addElement("1 in more than 100 years");
    // Values for SAFETY
    FastVector fvSafety = new FastVector(5);
    fvSafety.addElement("near miss");
    fvSafety.addElement("first aid injury, medical aid injury");
    fvSafety.addElement("lost time injury / temporary disability");
    fvSafety.addElement("permanent disability");
    fvSafety.addElement("fatality");
    // Values for EXTRA FUEL COST
    FastVector fvEFC = new FastVector(5);
    fvEFC.addElement("< 100 million rupiah");
    fvEFC.addElement("0,1 - 1 billion rupiah");
    fvEFC.addElement("1 - 10 billion rupiah");
    fvEFC.addElement("10 - 100  billion rupiah");
    fvEFC.addElement("> 100 billion rupiah");
    // Values for SYSTEM RELIABILITY
    FastVector fvSR = new FastVector(5);
    fvSR.addElement("< 100 MWh");
    fvSR.addElement("0,1 - 1 GWh");
    fvSR.addElement("1 - 10 GWh");
    fvSR.addElement("10 - 100 GWh");
    fvSR.addElement("> 100 GWh");
    // Values for EQUIPMENT COST
    FastVector fvEC = new FastVector(5);
    fvEC.addElement("< 50 million rupiah");
    fvEC.addElement("50 - 500 million rupiah");
    fvEC.addElement("0,5 - 5 billion rupiah");
    fvEC.addElement("5 -50 billion rupiah");
    fvEC.addElement("> 50 billion rupiah");
    // Values for CUSTOMER SATISFACTION SOCIAL FACTOR
    FastVector fvCSSF = new FastVector(5);
    fvCSSF.addElement("Complaint from the VIP customer");
    fvCSSF.addElement("Complaint from industrial customer");
    fvCSSF.addElement("Complaint from community");
    fvCSSF.addElement("Complaint from community that have potential riot");
    fvCSSF.addElement("High potential riot");
    // Values for RISK
    FastVector fvRisk = new FastVector(4);
    fvRisk.addElement("Low");
    fvRisk.addElement("Moderate");
    fvRisk.addElement("High");
    fvRisk.addElement("Extreme");
    // Values for ACTION
    FastVector fvAction = new FastVector(3);
    fvAction.addElement("Life Extension Program");
    fvAction.addElement("Repair/Refurbish");
    fvAction.addElement("Replace/Run to Fail + Investment");

    // Defining Attributes, including Class(es) Attributes
    Attribute attrLO = new Attribute("LO", fvLO);
    Attribute attrSafety = new Attribute("Safety", fvSafety);
    Attribute attrEFC = new Attribute("EFC", fvEFC);
    Attribute attrSR = new Attribute("SR", fvSR);
    Attribute attrEC = new Attribute("EC", fvEC);
    Attribute attrCSSF = new Attribute("CSSF", fvCSSF);
    Attribute attrRisk = new Attribute("Risk", fvRisk);
    Attribute attrAction = new Attribute("Action", fvAction);

    m_fvInstanceRisks = new FastVector(7);
    m_fvInstanceRisks.addElement(attrLO);
    m_fvInstanceRisks.addElement(attrSafety);
    m_fvInstanceRisks.addElement(attrEFC);
    m_fvInstanceRisks.addElement(attrSR);
    m_fvInstanceRisks.addElement(attrEC);
    m_fvInstanceRisks.addElement(attrCSSF);
    m_fvInstanceRisks.addElement(attrRisk);

    m_fvInstanceActions = new FastVector(7);
    m_fvInstanceActions.addElement(attrLO);
    m_fvInstanceActions.addElement(attrSafety);
    m_fvInstanceActions.addElement(attrEFC);
    m_fvInstanceActions.addElement(attrSR);
    m_fvInstanceActions.addElement(attrEC);
    m_fvInstanceActions.addElement(attrCSSF);
    m_fvInstanceActions.addElement(attrAction);

    Instances dataRisk = new Instances("A-Risk-instance-to-classify", m_fvInstanceRisks, 0);
    Instances dataAction = new Instances("An-Action-instance-to-classify", m_fvInstanceActions, 0);
    double[] riskValues = new double[dataRisk.numAttributes()];
    double[] actionValues = new double[dataRisk.numAttributes()];

    String strLO = (String) m_cmbLO.getSelectedItem();
    String strSafety = (String) m_cmbSafety.getSelectedItem();
    String strEFC = (String) m_cmbEFC.getSelectedItem();
    String strSR = (String) m_cmbSR.getSelectedItem();
    String strEC = (String) m_cmbEC.getSelectedItem();
    String strCSSF = (String) m_cmbCSSF.getSelectedItem();

    Instance instRisk = new DenseInstance(7);
    Instance instAction = new DenseInstance(7);

    if (strLO.equals("-- none --")) {
        instRisk.setMissing(0);
        instAction.setMissing(0);
    } else {
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(0), strLO);
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(0), strLO);
    }
    if (strSafety.equals("-- none --")) {
        instRisk.setMissing(1);
        instAction.setMissing(1);
    } else {
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(1), strSafety);
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(1), strSafety);
    }
    if (strEFC.equals("-- none --")) {
        instRisk.setMissing(2);
        instAction.setMissing(2);
    } else {
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(2), strEFC);
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(2), strEFC);
    }
    if (strSR.equals("-- none --")) {
        instRisk.setMissing(3);
        instAction.setMissing(3);
    } else {
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(3), strSR);
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(3), strSR);
    }
    if (strEC.equals("-- none --")) {
        instRisk.setMissing(4);
        instAction.setMissing(4);
    } else {
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(4), strEC);
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(4), strEC);
    }
    if (strCSSF.equals("-- none --")) {
        instRisk.setMissing(5);
        instAction.setMissing(5);
    } else {
        instAction.setValue((Attribute) m_fvInstanceActions.elementAt(5), strCSSF);
        instRisk.setValue((Attribute) m_fvInstanceRisks.elementAt(5), strCSSF);
    }
    instRisk.setMissing(6);
    instAction.setMissing(6);

    dataRisk.add(instRisk);
    instRisk.setDataset(dataRisk);
    dataRisk.setClassIndex(dataRisk.numAttributes() - 1);

    dataAction.add(instAction);
    instAction.setDataset(dataAction);
    dataAction.setClassIndex(dataAction.numAttributes() - 1);

    System.out.println("Instance Resiko: " + dataRisk.instance(0));
    System.out.println("\tNum Attributes : " + dataRisk.numAttributes());
    System.out.println("\tNum instances  : " + dataRisk.numInstances());
    System.out.println("Instance Action: " + dataAction.instance(0));
    System.out.println("\tNum Attributes : " + dataAction.numAttributes());
    System.out.println("\tNum instances  : " + dataAction.numInstances());

    int classIndexRisk = 0;
    int classIndexAction = 0;
    String strClassRisk = null;
    String strClassAction = null;

    try {
        //classIndexRisk = (int) m_treeResiko.classifyInstance(dataRisk.instance(0));
        classIndexRisk = (int) m_treeResiko.classifyInstance(instRisk);
        classIndexAction = (int) m_treeAksi.classifyInstance(instAction);
    } catch (Exception ex) {
        Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex);
    }

    strClassRisk = (String) fvRisk.elementAt(classIndexRisk);
    strClassAction = (String) fvAction.elementAt(classIndexAction);
    System.out.println("[Risk  Class Index: " + classIndexRisk + " Class Label: " + strClassRisk + "]");
    System.out.println("[Action  Class Index: " + classIndexAction + " Class Label: " + strClassAction + "]");
    if (strClassRisk != null) {
        m_txtRisk.setText(strClassRisk);
    }

    double[] riskDist = null;
    double[] actionDist = null;
    try {
        riskDist = m_nbResiko.distributionForInstance(dataRisk.instance(0));
        actionDist = m_nbAksi.distributionForInstance(dataAction.instance(0));
        String strProb;
        // set up RISK progress bars
        m_jBarRiskLow.setValue((int) (100 * riskDist[0]));
        m_jBarRiskLow.setString(String.format("%6.3f%%", 100 * riskDist[0]));
        m_jBarRiskModerate.setValue((int) (100 * riskDist[1]));
        m_jBarRiskModerate.setString(String.format("%6.3f%%", 100 * riskDist[1]));
        m_jBarRiskHigh.setValue((int) (100 * riskDist[2]));
        m_jBarRiskHigh.setString(String.format("%6.3f%%", 100 * riskDist[2]));
        m_jBarRiskExtreme.setValue((int) (100 * riskDist[3]));
        m_jBarRiskExtreme.setString(String.format("%6.3f%%", 100 * riskDist[3]));
    } catch (Exception ex) {
        Logger.getLogger(MatResUI.class.getName()).log(Level.SEVERE, null, ex);
    }

    double predictedProb = 0.0;
    String predictedClass = "";

    // Loop over all the prediction labels in the distribution.
    for (int predictionDistributionIndex = 0; predictionDistributionIndex < riskDist.length; predictionDistributionIndex++) {
        // Get this distribution index's class label.
        String predictionDistributionIndexAsClassLabel = dataRisk.classAttribute()
                .value(predictionDistributionIndex);
        int classIndex = dataRisk.classAttribute().indexOfValue(predictionDistributionIndexAsClassLabel);
        // Get the probability.
        double predictionProbability = riskDist[predictionDistributionIndex];

        if (predictionProbability > predictedProb) {
            predictedProb = predictionProbability;
            predictedClass = predictionDistributionIndexAsClassLabel;
        }

        System.out.printf("[%2d %10s : %6.3f]", classIndex, predictionDistributionIndexAsClassLabel,
                predictionProbability);
    }
    m_txtRiskNB.setText(predictedClass);
}