Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.ClusteringTask.java

License:Open Source License

/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData/* w  w  w  .j  a  v  a  2  s . c  o m*/
 *            Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
    FastVector attributes = new FastVector();

    for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
        String varName = "Var" + i;
        Attribute var = new Attribute(varName);
        attributes.addElement(var);
    }

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
        Attribute name = new Attribute("name", (FastVector) null);
        attributes.addElement(name);
    }
    Instances data = new Instances("Dataset", attributes, 0);

    for (int i = 0; i < selectedRows.length; i++) {
        double[] values = new double[data.numAttributes()];
        System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

        if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
            DecimalFormat twoDForm = new DecimalFormat("#.##");
            double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
            double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
            String rowName = "MZ->" + MZ + "/RT->" + RT;
            values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
        }
        Instance inst = new SparseInstance(1.0, values);
        data.add(inst);
    }
    return data;
}

From source file:netkit.classifiers.nonrelational.LocalWeka.java

License:Apache License

/**
 * Induce the weka classifier by creating a training Instances object according
 * to the schema of the nodes to be classified.
 *
 * @param graph Graph whose nodes are to be estimated
 * @param split The split between training and test.  Used to get the nodetype and class attribute.
 */// w ww .  ja  v  a 2  s.co  m
public void induceModel(Graph graph, DataSplit split) {
    super.induceModel(graph, split);

    Node[] trainingSet = split.getTrainSet();

    // return if no training is to be done.
    if (trainingSet == null || trainingSet.length == 0)
        return;

    // Create a FastVector of the possible values of the class attribute
    FastVector clsValues = new FastVector(attribute.size());
    for (String token : attribute.getTokens())
        clsValues.addElement(token);

    // Create the array that defines the attributes.  We do not include the 'key' attribute
    Attributes attribs = trainingSet[0].getAttributes();
    FastVector attInfo = new FastVector(attribs.attributeCount() - 1);
    for (Attribute attrib : attribs) {
        // do not include the KEY attribute
        if (attrib == attribs.getKey())
            continue;

        if (attrib.getType() == Type.CATEGORICAL) {
            String[] tokens = ((AttributeCategorical) attrib).getTokens();
            FastVector values = new FastVector(tokens.length);
            for (String token : tokens)
                values.addElement(token);
            attInfo.addElement(new weka.core.Attribute(attrib.getName(), values));
        } else
            attInfo.addElement(new weka.core.Attribute(attrib.getName()));
    }

    // Create the training Instances object + set the class attribute index
    Instances train = new Instances("train", attInfo, split.getTrainSetSize());
    train.setClassIndex(vectorClsIdx);

    // Create the training instance objects
    for (Node node : split.getTrainSet()) {
        double[] v = new double[attInfo.size()];
        makeVector(node, v);
        train.add(new Instance(1, v));
    }

    // Finally induce the weka classifier
    try {
        classifier.buildClassifier(train);
    } catch (Exception e) {
        throw new RuntimeException("Failed to build classifier " + classifier.getClass().getName(), e);
    }

    // Now set up the test environment.  It is a test Instances object containing
    // only a single test instance.  We also keep a reference to the double array
    // that represents the attribute values.
    cVector = new double[attInfo.size()];
    testInstance = new Instance(1, cVector);
    testInstances = new Instances("test", attInfo, 1);
    testInstances.setClassIndex(vectorClsIdx);
    testInstances.add(testInstance);
    testInstance = testInstances.firstInstance();
}

From source file:netkit.classifiers.relational.NetworkWeka.java

License:Apache License

/**
 * Induce the weka classifier by creating a training Instances object according
 * to the schema of the nodes to be classified.
 *
 * @param graph Graph whose nodes are to be estimated
 * @param split The split between training and test.  Used to get the nodetype and class attribute.
 *///  w  w w. ja v  a  2  s  .c  o m
public void induceModel(Graph graph, DataSplit split) {
    super.induceModel(graph, split);
    Node[] trainingSet = split.getTrainSet();
    if (trainingSet == null || trainingSet.length == 0)
        return;

    Attributes attribs = trainingSet[0].getAttributes();
    FastVector attInfo = new FastVector(tmpVector.length);
    logger.finer("Setting up WEKA attributes");
    if (useIntrinsic) {
        for (Attribute attrib : attribs) {
            // do not include the KEY attribute
            if (attrib == attribs.getKey())
                continue;

            switch (attrib.getType()) {
            case CATEGORICAL:
                String[] tokens = ((AttributeCategorical) attrib).getTokens();
                FastVector values = new FastVector(tokens.length);
                for (String token : tokens)
                    values.addElement(token);
                attInfo.addElement(new weka.core.Attribute(attrib.getName(), values));
                logger.finer("Adding WEKA attribute " + attrib.getName() + ":Categorical");
                break;

            default:
                attInfo.addElement(new weka.core.Attribute(attrib.getName()));
                logger.finer("Adding WEKA attribute " + attrib.getName() + ":Numerical");
                break;
            }
        }
    } else {
        String[] tokens = attribute.getTokens();
        FastVector values = new FastVector(tokens.length);
        for (String token : tokens)
            values.addElement(token);
        attInfo.addElement(new weka.core.Attribute(attribute.getName(), values));
        logger.finer("Adding WEKA attribute " + attribute.getName() + ":Categorical");
    }

    for (Aggregator agg : aggregators) {
        Attribute attrib = agg.getAttribute();
        switch (agg.getType()) {
        case CATEGORICAL:
            String[] tokens = ((AttributeCategorical) attrib).getTokens();
            FastVector values = new FastVector(tokens.length);
            for (String token : tokens)
                values.addElement(token);
            attInfo.addElement(new weka.core.Attribute(agg.getName(), values));
            logger.finer("Adding WEKA attribute " + agg.getName() + ":Categorical");
            break;

        default:
            attInfo.addElement(new weka.core.Attribute(agg.getName()));
            logger.finer("Adding WEKA attribute " + agg.getName() + ":Numerical");
            break;
        }
    }

    Instances train = new Instances("train", attInfo, split.getTrainSetSize());
    train.setClassIndex(vectorClsIdx);

    for (Node node : split.getTrainSet()) {
        double[] v = new double[attInfo.size()];
        makeVector(node, v);
        train.add(new Instance(1, v));
    }
    try {
        classifier.buildClassifier(train);
    } catch (Exception e) {
        throw new RuntimeException("Failed to build classifier " + classifier.getClass().getName(), e);
    }
    testInstance = new Instance(1, tmpVector);
    testInstances = new Instances("test", attInfo, 1);
    testInstances.setClassIndex(vectorClsIdx);
    testInstances.add(testInstance);
    testInstance = testInstances.firstInstance();
}

From source file:news.classifier.WekaLearner.java

public double classifyInstance(double[] instance) throws Exception {
    wClassifier.buildClassifier(wTrainingSet);
    Instances ins = new Instances(wTrainingSet, 0);
    Instance row = new DenseInstance(1.0, instance);
    ins.add(row);
    return wClassifier.classifyInstance(ins.lastInstance());
}

From source file:nl.uva.sne.commons.ClusterUtils.java

private static Instances createInstances(String inDir) throws Exception {
    List<Term> terms = dir2Terms(inDir);

    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Create documents");

    List<List<String>> allDocs = new ArrayList<>();
    Map<String, List<String>> docs = new HashMap<>();
    for (Term tv : terms) {
        try {//from   www  .  j  av a2 s  .  c o  m
            Set<String> doc = SemanticUtils.getDocument(tv);
            allDocs.add(new ArrayList<>(doc));
            docs.put(tv.getUID(), new ArrayList<>(doc));
        } catch (JWNLException ex) {
            Logger.getLogger(ClusterUtils.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Extract features");
    Set<String> allWords = new HashSet<>();
    Map<String, Map<String, Double>> featureVectors = new HashMap<>();
    for (String k : docs.keySet()) {
        List<String> doc = docs.get(k);
        Map<String, Double> featureVector = new TreeMap<>();
        for (String term : doc) {
            allWords.add(term);
            if (!featureVector.containsKey(term)) {
                double score = SemanticUtils.tfIdf(doc, allDocs, term);
                featureVector.put(term, score);
            }
        }
        featureVectors.put(k, featureVector);
    }

    //        for (String t : featureVectors.keySet()) {
    //            Map<String, Double> featureV = featureVectors.get(t);
    //            for (String word : allWords) {
    //                if (!featureV.containsKey(word)) {
    //                    featureV.put(word, 0.0);
    //                }
    //            }
    //            System.err.println(t+" "+featureV.size());
    //            featureVectors.put(t, featureV);
    //        }
    ArrayList<Attribute> attributes = new ArrayList<>();
    attributes.add(new Attribute("UID", (ArrayList<String>) null));
    for (String t : allWords) {
        attributes.add(new Attribute(t));
    }

    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Create Instances");

    Instances data = new Instances("Rel", attributes, terms.size());

    for (String t : featureVectors.keySet()) {
        Map<String, Double> featureV = featureVectors.get(t);
        double[] vals = new double[data.numAttributes()];
        vals[0] = data.attribute(0).addStringValue(t);
        int index = 1;
        for (String w : featureV.keySet()) {
            vals[index] = featureV.get(w);
            index++;
        }
        data.add(new DenseInstance(1.0, vals));

    }

    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Normalize vectors");
    Normalize filter = new Normalize();
    filter.setInputFormat(data);
    data = Filter.useFilter(data, filter);
    return data;

}

From source file:nl.uva.sne.commons.ClusterUtils.java

private static Instances createInstancesWithClasses(String inDir)
        throws IOException, ParseException, Exception {

    File dir = new File(inDir);
    File[] classFolders = dir.listFiles();

    List<List<String>> allDocs = new ArrayList<>();
    Map<String, List<String>> docs = new HashMap<>();
    Set<String> classes = new HashSet<>();
    for (File f : classFolders) {
        if (f.isDirectory()) {
            List<Term> terms = dir2Terms(f.getAbsolutePath());
            classes.add(f.getName());/*from  w w w . j  a v  a2  s .c o m*/
            for (Term tv : terms) {
                Set<String> doc = SemanticUtils.getDocument(tv);
                allDocs.add(new ArrayList<>(doc));
                docs.put(tv.getUID() + "," + f.getName(), new ArrayList<>(doc));
            }
        } else {
            List<Term> terms = new ArrayList<>();
            if (FilenameUtils.getExtension(f.getName()).endsWith("json")) {
                terms.add(TermFactory.create(new FileReader(f)));
            }
            classes.add("NON");
            for (Term tv : terms) {
                Set<String> doc = SemanticUtils.getDocument(tv);
                allDocs.add(new ArrayList<>(doc));
                docs.put(tv.getUID() + "," + "NON", new ArrayList<>(doc));
                //                    docs.put(tv.getUID(), new ArrayList<>(doc));
            }
        }
    }

    Set<String> allWords = new HashSet<>();
    Map<String, Map<String, Double>> featureVectors = new HashMap<>();
    for (String k : docs.keySet()) {
        List<String> doc = docs.get(k);
        Map<String, Double> featureVector = new TreeMap<>();
        for (String term : doc) {
            allWords.add(term);
            if (!featureVector.containsKey(term)) {
                double score = SemanticUtils.tfIdf(doc, allDocs, term);
                featureVector.put(term, score);
            }
        }
        featureVectors.put(k, featureVector);
    }

    for (String t : featureVectors.keySet()) {
        Map<String, Double> featureV = featureVectors.get(t);
        for (String word : allWords) {
            if (!featureV.containsKey(word)) {
                featureV.put(word, 0.0);
            }
        }
        //            System.err.println(t + " " + featureV.size());
        featureVectors.put(t, featureV);
    }
    ArrayList<Attribute> attributes = buildAttributes(allWords, classes);

    Instances data = new Instances("Rel", attributes, docs.size());
    data.setClassIndex(data.numAttributes() - 1);

    for (String t : featureVectors.keySet()) {
        String[] parts = t.split(",");
        String id = parts[0];
        String theClass = parts[parts.length - 1];
        int index = 0;
        double[] vals = new double[data.numAttributes()];
        vals[index] = data.attribute(0).addStringValue(id);
        index++;
        Map<String, Double> featureV = featureVectors.get(t);
        for (String w : featureV.keySet()) {
            vals[index] = featureV.get(w);
            index++;
        }
        DenseInstance inst = new DenseInstance(1.0, vals);
        inst.setDataset(data);
        inst.setClassValue(theClass);
        data.add(inst);
    }
    return data;

}

From source file:nlpmusic.StringClusterer.java

public static Instances listLoad(ArrayList<String> list) {
    FastVector attributes = new FastVector();
    attributes.addElement(new Attribute("attr", (FastVector) null));
    Instances datas = new Instances("Strings", attributes, 0);

    for (String str : list) {
        DenseInstance inst = new DenseInstance(1);
        inst.setValue(datas.attribute(0), str);
        datas.add(inst);
    }/*from w  w  w  . ja  v a2  s.  c o m*/
    return datas;
}

From source file:OAT.trading.classification.Weka.java

License:Open Source License

private Instances getInstances(List<TrainingSample> trainingSet) {
    Instances data = new Instances("trainingSet", attributes, 0);

    for (TrainingSample trainingSample : trainingSet) {
        double[] vars = Arrays.copyOf(trainingSample.getInputVector(), attributes.size());

        int classIndex = attributes.size() - 1;
        vars[classIndex] = (Double) trainingSample.getDesiredOutput() < 0.5 ? classes.indexOf("0")
                : classes.indexOf("1");

        data.add(new Instance(1.0, vars));
    }//from  ww  w.j a  va2  s .  c o m

    data.setClassIndex(attributes.size() - 1);

    return data;
}

From source file:OAT.trading.classification.Weka.java

License:Open Source License

private Instances getInstances(InputSample input) {
    Instances data = new Instances("inputSet", attributes, 0);

    double[] vars = Arrays.copyOf(input.getInputVector(), attributes.size());

    int classIndex = attributes.size() - 1;
    vars[classIndex] = classes.indexOf("0");

    data.add(new Instance(1.0, vars));

    data.setClassIndex(attributes.size() - 1);

    return data;/*  www  .j  a  v  a 2 s.  co m*/
}

From source file:OnTheFlyMethods.FastImplementations.RedefinedWeightedNodePruning.java

License:Open Source License

protected boolean verifyValidEntities(int entityId, int xxx, List<AbstractBlock> newBlocks,
        ExecuteBlockComparisons ebc, Instances trainingInstances) {
    int index;/*from   www.j a  v a  2  s  .  co  m*/
    retainedNeighbors.clear();
    if (!cleanCleanER) {
        //            for (int neighborId : validEntities) {
        //                if (isValidComparison(entityId, neighborId,ebc)) {
        //                    totalComparisons++;
        //                    duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId));
        //                }
        //            }
    } else {
        if (entityId < datasetLimit) {
            //               //Iterator<Integer> temp = validEntitiesB.iterator();
            int size = validEntities.size();
            Iterator<Integer> it = validEntitiesB.iterator();
            for (int neighborId : validEntities) {

                Integer value = map.get(entityId);
                if (value != null && value == neighborId) {
                    // System.out.println("----");
                    continue;
                }

                value = map.get(neighborId);
                if (value != null && value == entityId) {
                    // System.out.println("----");
                    continue;
                }

                map.put(entityId, neighborId);

                // if(entityId==1178 && neighborId==2562)
                //     System.out.println("ok");
                //                   // index=temp.next();

                int blockIndex = it.next();
                if (isValidComparison(entityId, neighborId, ebc)) {
                    totalComparisons++;
                    duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId));
                    //  if(apagar++%1000==0)
                    //     System.out.println(apagar);

                    Comparison c;
                    if (entityId < datasetLimit)
                        c = new Comparison(true, entityId, neighborId - datasetLimit);
                    else
                        c = new Comparison(true, entityId - datasetLimit, neighborId);
                    final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(blockIndex, c);
                    if (commonBlockIndices == null)
                        continue;
                    //                       if(!retainedEntitiesD1.contains(comparison.getEntityId1()))
                    //                          retainedEntitiesD1.add(comparison.getEntityId1());
                    //                       if(!retainedEntitiesD2.contains(comparison.getEntityId2()))
                    //                          retainedEntitiesD2.add(comparison.getEntityId2());
                    ////////////////////////////

                    //    if(c.getEntityId1()==1 && c.getEntityId2()==12088)
                    //     System.out.println();
                    double[] instanceValues = new double[8];

                    // int entityId2 = comparison.getEntityId2() + entityIndex.getDatasetLimit();

                    double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(c.getEntityId1(), 0));
                    double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(c.getEntityId2(), 1));

                    instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2;

                    double raccb = 0;
                    for (Integer index1 : commonBlockIndices) {
                        raccb += 1.0 / comparisonsPerBlock[index1];
                    }
                    if (raccb < 1.0E-6) {
                        raccb = 1.0E-6;
                    }
                    instanceValues[1] = raccb;

                    String temp = Integer.toString(entityId) + "00"
                            + Integer.toString(neighborId - datasetLimit);
                    instanceValues[2] = commonBlockIndices.size() / (redundantCPE[c.getEntityId1()]
                            + redundantCPE[c.getEntityId2()] - commonBlockIndices.size());
                    instanceValues[3] = nonRedundantCPE[c.getEntityId1()];
                    instanceValues[4] = nonRedundantCPE[c.getEntityId2()];
                    //      instanceValues[5] =   ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2());
                    instanceValues[5] = getWeight(entityId, neighborId, ebc);
                    instanceValues[6] = (Math.sqrt(
                            Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4)
                            * getWeight(entityId, neighborId, ebc);

                    instanceValues[7] = adp.isSuperfluous(getComparison(entityId, neighborId)) ? 1 : 0;

                    Instance newInstance = new DenseInstance(1.0, instanceValues);
                    newInstance.setDataset(trainingInstances);
                    trainingInstances.add(newInstance);
                }
            }
        } else {
            Iterator<Integer> it = validEntitiesB.iterator();
            for (int neighborId : validEntities) {

                Integer value = map.get(entityId);
                if (value != null && value == neighborId) {
                    // System.out.println("----");
                    continue;
                }

                value = map.get(neighborId);
                if (value != null && value == entityId) {
                    // System.out.println("----");
                    continue;
                }

                map.put(entityId, neighborId);

                int blockIndex = it.next();
                //               if (isValidComparison(entityId, neighborId,ebc)) {
                //                  totalComparisons++;
                //                  duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId));
                //                  //                        if(apagar++%1000==0)
                //                  //                          System.out.println(apagar);
                //                  //                        
                //                  //                        if(apagar==3)
                //                  //                           System.out.println();
                //
                //
                //                  Comparison c ;
                //                  if(entityId<datasetLimit)
                //                     c = new Comparison(true, entityId, neighborId-datasetLimit);
                //                  else
                //                     c = new Comparison(true, entityId-datasetLimit, neighborId);
                //                  final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(blockIndex, c);
                //                  if(commonBlockIndices==null)
                //                     continue;
                //                  //                           if(!retainedEntitiesD1.contains(comparison.getEntityId1()))
                //                  //                              retainedEntitiesD1.add(comparison.getEntityId1());
                //                  //                           if(!retainedEntitiesD2.contains(comparison.getEntityId2()))
                //                  //                              retainedEntitiesD2.add(comparison.getEntityId2());
                //                  ////////////////////////////
                //                  double[] instanceValues = new double[8];
                //
                //                  // int entityId2 = comparison.getEntityId2() + entityIndex.getDatasetLimit();
                //
                //                  double ibf1 = Math.log(noOfBlocks/entityIndex.getNoOfEntityBlocks(entityId, 0));
                //                  double ibf2 = Math.log(noOfBlocks/entityIndex.getNoOfEntityBlocks(neighborId-datasetLimit, 1));
                //
                //                  instanceValues[0] = commonBlockIndices.size()*ibf1*ibf2;
                //
                //                  double raccb = 0;
                //                  for (Integer index1 : commonBlockIndices) {
                //                     raccb += 1.0 / comparisonsPerBlock[index1];
                //                  }
                //                  if (raccb < 1.0E-6) {
                //                     raccb = 1.0E-6;
                //                  }
                //                  instanceValues[1] = raccb;
                //
                //                  instanceValues[2] = commonBlockIndices.size() / (redundantCPE[c.getEntityId1()] + redundantCPE[neighborId-datasetLimit] - commonBlockIndices.size());
                //                  instanceValues[3] = nonRedundantCPE[entityId];
                //                  instanceValues[4] = nonRedundantCPE[neighborId-datasetLimit];
                //                  instanceValues[5]= (Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) *  getWeight(entityId, neighborId, ebc);
                ////                  instanceValues[5] =   ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2());
                //                   instanceValues[5]= getWeight(entityId, neighborId, ebc);
                //                  instanceValues[6]= (Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) *  getWeight(entityId, neighborId, ebc);
                //
                //                  instanceValues[7] = adp.isSuperfluous(getComparison(entityId, neighborId))?1:0;
                //
                //                  Instance newInstance = new DenseInstance(1.0, instanceValues);
                //                  newInstance.setDataset(trainingInstances);
                //                  trainingInstances.add(newInstance);
                //                  //return true; 
                //               }

            }
        }
    }
    return false;
}