Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:elh.eus.absa.Features.java

License:Open Source License

/**
 *   Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. 
 *   Attribute vectors contain the features loaded by the creatFeatureSet() function.
 * /*from   www. j  a va 2s .c o  m*/
 * @param boolean save : whether the Instances file should be saved to an arff file or not.
 * @return Weka Instances object containing the attribute vectors filled with the features specified
 *          in the parameter file.
 */
public Instances loadInstancesConll(boolean save, String prefix) {
    String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_"
            + prefix;
    HashMap<String, Opinion> trainExamples = corpus.getOpinions();

    String nafdir = params.getProperty("kafDir");
    int trainExamplesNum = trainExamples.size();

    int bowWin = 0;
    if (params.containsKey("window")) {
        bowWin = Integer.parseInt(params.getProperty("window"));
        savePath = savePath + "_w" + bowWin;
    }

    //System.out.println("train examples: "+trainExamplesNum);
    //Create the Weka object for the training set
    Instances rsltdata = new Instances("train", atts, trainExamplesNum);

    // setting class attribute (last attribute in train data.
    //traindata.setClassIndex(traindata.numAttributes() - 1);

    System.err.println("Features: loadInstancesConll() - featNum: " + this.featNum
            + " - trainset attrib num -> " + rsltdata.numAttributes() + " - ");
    System.out.println("Features: loadInstancesConll() - featNum: " + this.featNum
            + " - trainset attrib num -> " + rsltdata.numAttributes() + " - ");

    int instId = 1;
    // fill the vectors for each training example
    for (String oId : trainExamples.keySet()) {
        //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId()));

        //value vector
        double[] values = new double[featNum];

        // first element is the instanceId         
        values[rsltdata.attribute("instanceId").index()] = instId;

        LinkedList<String> ngrams = new LinkedList<String>();
        int ngramDim;
        try {
            ngramDim = Integer.valueOf(params.getProperty("wfngrams"));
        } catch (Exception e) {
            ngramDim = 0;
        }

        boolean polNgrams = false;
        if (params.containsKey("polNgrams")) {
            polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes");
        }

        String nafPath = nafdir + File.separator + trainExamples.get(oId).getsId().replace(':', '_');
        String taggedFile = "";
        try {
            if (!FileUtilsElh.checkFile(nafPath + ".kaf")) {
                nafPath = NLPpipelineWrapper.tagSentence(corpus.getOpinionSentence(oId), nafPath,
                        corpus.getLang(), params.getProperty("pos-model"), params.getProperty("lemma-model"),
                        postagger);
            } else {
                nafPath = nafPath + ".kaf";
            }
            InputStream reader = new FileInputStream(new File(nafPath));
            taggedFile = IOUtils.toString(reader);
            reader.close();
        } catch (IOException | JDOMException fe) {
            // TODO Auto-generated catch block
            fe.printStackTrace();
        }

        String[] noWindow = taggedFile.split("\n");

        //counter for opinion sentence token number. Used for computing relative values of the features
        int tokNum = noWindow.length;

        //System.err.println("Features::loadInstancesConll - tagged File read lines:"+tokNum);

        List<String> window = Arrays.asList(noWindow);
        Integer end = corpus.getOpinion(oId).getTo();
        // apply window if window active (>0) and if the target is not null (to=0)
        if ((bowWin > 0) && (end > 0)) {
            Integer start = corpus.getOpinion(oId).getFrom();
            Integer from = start - bowWin;
            if (from < 0) {
                from = 0;
            }
            Integer to = end + bowWin;
            if (to > noWindow.length - 1) {
                to = noWindow.length - 1;
            }
            window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to));
        }

        //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+
        //      "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n");

        //System.err.println(Arrays.toString(window.toArray()));

        // word form ngram related features
        for (String wf : window) {
            String[] fields = wf.split("\\s");
            String wfStr = normalize(fields[0], params.getProperty("normalization", "none"));
            // blank line means we found a sentence end. Empty n-gram list and reiniciate.  
            if (wf.equals("")) {
                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, true); //toknum

                // since wf is empty no need to check for clusters and other features.
                continue;
            }

            if (params.containsKey("wfngrams") && ngramDim > 0) {
                if (!savePath.contains("_wf" + ngramDim)) {
                    savePath = savePath + "_wf" + ngramDim;
                }
                //if the current word form is in the ngram list activate the feature in the vector
                if (ngrams.size() >= ngramDim) {
                    ngrams.removeFirst();
                }
                ngrams.add(wfStr);

                // add ngrams to the feature vector
                checkNgramFeatures(ngrams, values, "", 1, false); //toknum
            }
            // Clark cluster info corresponding to the current word form
            if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) {
                if (!savePath.contains("_cl")) {
                    savePath = savePath + "_cl";
                }
                values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) {
                if (!savePath.contains("_br")) {
                    savePath = savePath + "_br";
                }
                values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++;
            }

            // Clark cluster info corresponding to the current word form
            if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) {
                if (!savePath.contains("_w2v")) {
                    savePath = savePath + "_w2v";
                }
                values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++;
            }

        }

        //empty ngram list and add remaining ngrams to the feature list
        checkNgramFeatures(ngrams, values, "", 1, true); //toknum

        // PoS tagger related attributes: lemmas and pos tags
        if (params.containsKey("lemmaNgrams")
                || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0"))
                || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) {
            ngrams = new LinkedList<String>();
            if (params.containsKey("lemmaNgrams")
                    && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) {
                ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams"));
            } else {
                ngramDim = 3;
            }
            LinkedList<String> posNgrams = new LinkedList<String>();
            int posNgramDim = 0;
            if (params.containsKey("pos")) {
                posNgramDim = Integer.valueOf(params.getProperty("pos"));
            }

            for (String t : window) {
                //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))
                if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral")
                        || params.containsKey("polarLexiconDomain")) {
                    if (!savePath.contains("_l" + ngramDim)) {
                        savePath = savePath + "_l" + ngramDim;
                    }

                    //blank line means we found a sentence end. Empty n-gram list and reiniciate.
                    if (t.equals("")) {
                        // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                        checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum

                        // since t is empty no need to check for clusters and other features.
                        continue;
                    }

                    String[] fields = t.split("\\s");
                    if (fields.length < 2) {
                        continue;
                    }
                    String lemma = normalize(fields[1], params.getProperty("normalization", "none"));

                    if (ngrams.size() >= ngramDim) {
                        ngrams.removeFirst();
                    }
                    ngrams.add(lemma);

                    // check both lemma n-grams and polarity lexicons, and add values to the feature vector
                    checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams);

                }

                //pos tags
                if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) {
                    if (!savePath.contains("_p")) {
                        savePath = savePath + "_p";
                    }

                    if (posNgrams.size() >= posNgramDim) {
                        posNgrams.removeFirst();
                    }

                    String[] fields = t.split("\\s");
                    if (fields.length < 3) {
                        continue;
                    }
                    String pos = fields[2];

                    posNgrams.add(pos);

                    // add ngrams to the feature vector
                    checkNgramFeatures(posNgrams, values, "pos", 1, false);
                }
            } //endFor

            //empty ngram list and add remaining ngrams to the feature list
            // check both lemma n-grams and polarity lexicons, and add values to the feature vector
            checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams);

            //empty pos ngram list and add remaining pos ngrams to the feature list
            checkNgramFeatures(posNgrams, values, "pos", 1, true);

        }

        // add sentence length as a feature
        if (params.containsKey("sentenceLength")
                && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) {
            values[rsltdata.attribute("sentenceLength").index()] = tokNum;
        }

        // compute uppercase ratio before normalization (if needed)      
        //double upRatio =0.0;
        //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes"))
        //{
        //   String upper = opNormalized.replaceAll("[a-z]", "");
        //   upRatio = (double)upper.length() / (double)opNormalized.length();
        //   values[rsltdata.attribute("upperCaseRation").index()] = upRatio;
        //}

        //create object for the current instance and associate it with the current train dataset.         
        Instance inst = new SparseInstance(1.0, values);
        inst.setDataset(rsltdata);

        // add category attributte values
        String cat = trainExamples.get(oId).getCategory();

        if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) {
            if (cat.compareTo("NULL") == 0) {
                inst.setValue(rsltdata.attribute("entCat").index(), cat);
                inst.setValue(rsltdata.attribute("attCat").index(), cat);
            } else {
                String[] splitCat = cat.split("#");
                inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]);
                inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]);
            }

            //inst.setValue(attIndexes.get("entAttCat"), cat);
        } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) {
            inst.setValue(rsltdata.attribute("entAttCat").index(), cat);
        }

        if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) {
            // add class value as a double (Weka stores all values as doubles )
            String pol = normalizePolarity(trainExamples.get(oId).getPolarity());
            if (pol != null && !pol.isEmpty()) {
                inst.setValue(rsltdata.attribute("polarityCat"), pol);
            } else {
                //System.err.println("polarity: _"+pol+"_");
                inst.setMissing(rsltdata.attribute("polarityCat"));
            }
        }

        //add instance to train data
        rsltdata.add(inst);

        //store opinion Id and instance Id
        this.opInst.put(oId, instId);
        instId++;
    }

    System.err.println("Features : loadInstancesConll() - training data ready total number of examples -> "
            + trainExamplesNum + " - " + rsltdata.numInstances());

    if (save) {
        try {
            savePath = savePath + ".arff";
            System.err.println("arff written to: " + savePath);
            ArffSaver saver = new ArffSaver();

            saver.setInstances(rsltdata);

            saver.setFile(new File(savePath));
            saver.writeBatch();
        } catch (IOException e1) {
            e1.printStackTrace();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    return rsltdata;
}

From source file:entities.WekaBaselineBOWFeatureVector.java

public Instances fillInstanceSet(ArrayList<BaselineBOWFeatureVector> vList,
        ArrayList<BaselineBOWFeatureVector> vList2) throws IOException {

    ArrayList<Attribute> attributes = initializeWekaFeatureVector();
    Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size());

    isSet.setClassIndex(isSet.numAttributes() - 1);

    for (BaselineBOWFeatureVector BOWv : vList) {

        Instance i = fillFeatureVector(BOWv, isSet);

        isSet.add(i);
    }/*ww w.j  av  a 2  s.  co  m*/

    for (BaselineBOWFeatureVector BOWv : vList2) {

        Instance i = fillFeatureVector(BOWv, isSet);

        isSet.add(i);
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(isSet);
    saver.setFile(new File("./data/test.arff"));
    saver.writeBatch();

    return isSet;
}

From source file:entities.WekaBOWFeatureVector.java

public Instances fillInstanceSet(ArrayList<BOWFeatureVector> vList, ArrayList<BOWFeatureVector> vList2)
        throws IOException {

    ArrayList<Attribute> attributes = initializeWekaFeatureVector();
    Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size());

    isSet.setClassIndex(isSet.numAttributes() - 1);

    for (BOWFeatureVector BOWv : vList) {

        Instance i = fillFeatureVector(BOWv, isSet);

        isSet.add(i);
    }/*from   w w  w. j a v  a 2s. c  o  m*/

    for (BOWFeatureVector BOWv : vList2) {

        Instance i = fillFeatureVector(BOWv, isSet);

        isSet.add(i);
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(isSet);
    saver.setFile(new File("./data/test.arff"));
    saver.writeBatch();

    return isSet;
}

From source file:entities.WekaHMMFeatureVector.java

public Instances fillInstanceSet(ArrayList<HMMFeatureVector> vList, ArrayList<HMMFeatureVector> vList2)
        throws IOException {

    //FastVector fvWekaAttributesHmm = new FastVector(3);

    ArrayList<Attribute> attributes = initializeWekaFeatureVector();
    Instances isSet = new Instances("dataset", attributes, vList.size());

    isSet.setClassIndex(isSet.numAttributes() - 1);

    for (HMMFeatureVector HMMv : vList) {

        Instance i = fillFeatureVector(HMMv, isSet);

        isSet.add(i);
    }//from  w w  w  .j  a  v  a  2s .c  om

    for (HMMFeatureVector HMMv : vList2) {

        Instance i = fillFeatureVector(HMMv, isSet);

        isSet.add(i);
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(isSet);
    saver.setFile(new File("./data/test.arff"));
    saver.writeBatch();

    return isSet;
}

From source file:entities.WekaNGGFeatureVector.java

public Instances fillInstanceSet(ArrayList<NGGFeatureVector> vList, ArrayList<NGGFeatureVector> vList2,
        String datasetType) throws IOException {
    ArrayList<Attribute> attributes = initializeWekaFeatureVector();
    Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size());

    isSet.setClassIndex(isSet.numAttributes() - 1);

    for (NGGFeatureVector NGGv : vList) {

        Instance i = fillFeatureVector(NGGv, isSet);

        isSet.add(i);
    }/* w ww  . ja  v a  2 s.c om*/

    for (NGGFeatureVector NGGv : vList2) {

        Instance i = fillFeatureVector(NGGv, isSet);

        isSet.add(i);
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(isSet);
    saver.setFile(new File("./data/" + datasetType + ".arff"));
    saver.writeBatch();

    return isSet;
}

From source file:entity.DifficultyResamplingManager.java

License:Open Source License

/**
 * called by generateResampledSubdataset
 * /*from   www  .  j  av  a  2s  . c om*/
 * @param originalDataset
 * @param subdatasetDimensions
 * @return
 */
private Instances generateResampledSubdataset(Instances originalDataset,
        SubdatasetDimensions subdatasetDimensions) {

    // creates an empty dataset
    Instances resampledSubdataset = new Instances(originalDataset);
    resampledSubdataset.delete();

    // randomize dataset instances order
    originalDataset.randomize(RandomizationManager.randomGenerator);

    // calc number of positives to insert
    int positivesToInsert = subdatasetDimensions.getP();
    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = "
                + positivesToInsert);

    // calc number of negatives to insert
    int negativesToInsert = subdatasetDimensions.getN();

    // iterates over the original dataset instances
    for (int i = 0; i < originalDataset.numInstances(); i++) {
        // if instance is positive and more are needed in the new dataset, inserts into new dataset
        if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.buggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            positivesToInsert--;
        }

        // if instance is negative and more are needed in the new dataset, inserts into new dataset
        if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex())
                .equals(Settings.nonbuggyLabel))) {
            resampledSubdataset.add(originalDataset.instance(i));
            negativesToInsert--;
        }

    }

    if (verbose)
        System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: "
                + this.printDatasetInfo(resampledSubdataset));
    return resampledSubdataset;
}

From source file:ergasia2pkg.LP_ROS.java

/**
 * Creates a new MultiLabelInstances object given a list of Instance
 *
 * @param Hashmap<String,List<Instance> map from which to create instances
 * @param MultiLabelInstances used just to get the Label metadata
 * @return MultiLabelInstances new MultiLabelInstances Object
 *//* w  ww  . j  a v  a2s. co  m*/
private MultiLabelInstances createNewMultilabelInstance(HashMap<String, List<Instance>> labelsetGroup,
        MultiLabelInstances mlData) throws InvalidDataFormatException {

    Instances in = mlData.getDataSet();
    Enumeration enumeration = in.enumerateAttributes();
    ArrayList attlist = Collections.list(enumeration);
    int capacity = 0;
    for (String labelset : labelsetGroup.keySet()) {
        capacity += labelsetGroup.get(labelset).size();
    }

    Instances newInstances = new Instances("sampledDataset", attlist, capacity);
    for (String labelset : labelsetGroup.keySet()) {
        List<Instance> instanceList = (ArrayList<Instance>) labelsetGroup.get(labelset);
        for (Instance inst : instanceList) {
            newInstances.add(inst);
        }
    }

    MultiLabelInstances newData = new MultiLabelInstances(newInstances, mlData.getLabelsMetaData());
    return newData;
}

From source file:ergasia2pkg.ML_RUS.java

/**
 * Method to perform undersampling on the initial dataset. The method
 * removes instances from the dataset according to the algorithm proposed on
 * the paper, utilising the Mean Imbalance Ratio measure.
 *
 * @param mlData MultiLabelInstances object, holds a set of multilabel
 * instances//from w w w. ja  v  a 2  s. co  m
 * @return MultiLabelInstances object containing the initial labels minus
 * the labels removed by undersampling
 * @throws Exception
 */
@Override
public MultiLabelInstances transformInstances(MultiLabelInstances mlData) throws Exception {
    //Initialise the label counters
    labelCount(mlData);
    //Clone the dataset into a new object
    MultiLabelInstances mlDataClone = mlData.clone();
    //Clone a new set to contain all the instances that will be returned
    Instances mlDataReturned = mlData.clone().getDataSet();
    mlDataReturned.delete();

    //Calculate the number of samples to remove
    int samplesToDelete = (int) (mlData.getNumInstances() / (100 * P));
    int remainingLabels;
    //Declare two lists of lists, a minorityBag and a majorityBag. The minBag 
    //will contain lists (bags) of instances having labels with 
    //an imbalance ratio higher than the mean imbalance ratio. These will be 
    //set aside and not tampered with in any way. The majBag will also contain 
    //lists of instances having labels with an imbalance ratio lower than or 
    //equal to the mean imbalance ratio. These instances will be the candidates 
    //for deletion.
    List<List<Instance>> minBags = new ArrayList<>();
    List<List<Instance>> majBags = new ArrayList<>();
    //Get an array with the indices of all the labels
    int L[] = mlDataClone.getLabelIndices();
    //Calculate the dataset's mean imbalance ratio
    double meanIR = meanImbalanceRatio(mlDataClone);
    String labelName;
    int i = 0, m = 0, x, labelCounter = 0;
    //Declare a boolean array which will follow the labelset L, and determine 
    //whether or not a label's instances should be considered for undersampling
    //Initialise all its values to true.
    boolean included[] = new boolean[L.length];
    for (int k = 0; k < L.length; k++) {
        included[k] = true;
    }
    Random rand = new Random();
    //Perform the following operation for each label
    //Note that labels are represented by their integer index, which is then
    //transformed to its string name. This was done to avoid problems and 
    //exceptions thrown by methods required below
    for (int label : L) {
        //Get the label name from the current instance, based on label index
        labelName = mlDataClone.getDataSet().attribute(label).name();
        if (imbalanceRatioPerLabel(mlDataClone, labelName) > meanIR) {
            //if the imbalance ratio of the label is greater than the mean 
            //imbalance ratio of the dataset, add it to the minbag corresponding 
            //to the specific label. 
            minBags.add(new ArrayList<Instance>());
            //Add all instances containing this label to the minbag we just 
            //created
            for (int l = 0; l < mlDataClone.getNumInstances(); l++) {
                if (mlDataClone.getDataSet().get(l).value(label) == 1.0) {
                    minBags.get(i).add(mlDataClone.getDataSet().get(l));
                    //Remove the label from the dataset
                    mlDataClone.getDataSet().delete(l);
                }
            }
            //Set the included flag as false, so that the label is not added
            //to the majbags
            included[labelCounter] = false;
            i++;
        }
        labelCounter++;
    }
    //For every label again
    for (int label : L) {
        //Add a new majbag (one for each label)
        majBags.add(new ArrayList<Instance>());
        //Add all the instances having this label to the majbag. Note that 
        //this operation takes place on the cloned dataset, which now contains
        //only the instances not having minority labels
        for (int l = 0; l < mlDataClone.getNumInstances(); l++) {
            if (mlDataClone.getDataSet().get(l).value(label) == 1.0) {
                majBags.get(m).add(mlDataClone.getDataSet().get(l));
            }
        }
        m++;
    }
    remainingLabels = L.length - minBags.size();

    //While we haven't deleted all the samples yet and we still have labels 
    //to delete
    while (samplesToDelete > 0 && remainingLabels > 0) {
        //For each of the INITIAL labels (not only the ones in the cloned dataset)
        for (int j = 0; j < mlData.getNumLabels(); j++) {
            if (included[j]) {
                //if it is to be included (meaning it is a majority label), check 
                //if this bag contains instances. If it doesn't, decrease the 
                //numbers and go to the next iteration
                if (majBags.get(j).size() == 0) {
                    included[j] = false;
                    remainingLabels--;
                    continue;
                }
                //Get a random instance from the bag
                x = rand.nextInt(majBags.get(j).size());
                //Based on the instance and the index, get its label
                labelName = majBags.get(j).get(x).attribute(L[j]).name();
                //Remove the instance from the bag
                majBags.get(j).remove(x);
                //If the imbalance ratio of the label has increased beyond the 
                //acceptable limit of the mean imbalance ratio, remove this 
                //majbag from future candidates
                if (imbalanceRatioPerLabel(mlDataClone, labelName) >= meanIR) {
                    included[j] = false;
                    remainingLabels--;
                }
                samplesToDelete--;
            }
        }
    }
    //Add the contents of the minbags and the majbags to an empty dataset 
    //and return it
    for (List<Instance> list : minBags) {
        for (Instance inst : list) {
            mlDataReturned.add(inst);
        }
    }
    for (List<Instance> list : majBags) {
        for (Instance inst : list) {
            mlDataReturned.add(inst);
        }
    }

    return new MultiLabelInstances(mlDataReturned, mlData.getLabelsMetaData());
}

From source file:EsperTest.CEPListener.java

public void update(EventBean[] newData, EventBean[] oldData) {

    System.out.println("Event received: " + newData[0].getUnderlying());
    if (newData.length > 2) {
        //create the column name and type, these are strings
        //http://weka.wikispaces.com/Creating+an+ARFF+file
        Instances data;
        FastVector atts = new FastVector();

        for (int j = 0; j < columnNumbers.length; j++) {
            FastVector values = new FastVector();
            for (int i = 0; i < labels.NominalCount(j); i++) {
                values.addElement(labels.GetLabel(columnNumbers[j], i));
            }/*from w w w . j  av  a2s .c o m*/
            atts.addElement(new Attribute(labels.GetHeader(columnNumbers[j]), values));
        }

        data = new Instances("Title", atts, 0);

        for (int i = 0; i < newData.length; i++) {
            Instance inst = new Instance(columnNumbers.length);
            for (int j = 0; j < columnNumbers.length; j++) {
                inst.setValue(j, newData[i].get("eventType").toString());
            }
            data.add(inst);
        }

        Apriori aprioriObj = new weka.associations.Apriori();

        try {
            aprioriObj.buildAssociations(data);
        } catch (Exception e) {
            System.out.println(e);
        }

        FastVector rules[] = aprioriObj.getAllTheRules();

    }

}

From source file:etc.aloe.data.SegmentSet.java

License:Open Source License

/**
 * Convert the segment set into an ExampleSet (ready for feature
 * extraction). The returned example set includes an id attribute, the
 * message text, a label attribute, and several basic features extracted
 * from the segment./*from   ww w. j a  va 2s  .  c om*/
 *
 * @return
 */
public ExampleSet getBasicExamples() {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME));
    attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" })));
    attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(DURATION_ATTR_NAME));
    attributes.add(new Attribute(LENGTH_ATTR_NAME));
    attributes.add(new Attribute(CPS_ATTR_NAME));
    attributes.add(new Attribute(RATE_ATTR_NAME));

    Instances instances = new Instances("BasicExamples", attributes, 0);
    instances.setClassIndex(2);

    Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME);
    Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME);
    Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME);
    Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME);
    Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME);
    Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME);
    Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME);
    Attribute rateAttr = instances.attribute(RATE_ATTR_NAME);

    for (int i = 0; i < size(); i++) {
        Segment segment = get(i);
        Instance instance = new DenseInstance(instances.numAttributes());

        String messageStr = segment.concatMessages();
        String participantStr = segment.concatParticipants();

        instance.setValue(idAttr, segment.getId());
        instance.setValue(messageAttr, messageStr);
        instance.setValue(participantAttr, participantStr);

        if (segment.hasTrueLabel()) {
            instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false");
        }

        computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr);

        instances.add(instance);
    }

    return new ExampleSet(instances);
}