Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * SetTemplate - returns a copy of x_template, set with x's attributes, and set to dataset D_template (of which x_template) is a template of this.
 * This function is very useful when Weka throws a strange IndexOutOfBounds exception for setTemplate(x,Template)
 *//*from  w w w .j av  a  2 s . c o  m*/
public static final Instance setTemplate(Instance x, Instance x_template, Instances D_template) {
    Instance x_ = (Instance) x_template.copy();
    int L_y = x.classIndex();
    int L_z = D_template.classIndex();
    // copy over x space
    MLUtils.copyValues(x_, x, L_y, L_z);
    // set class values to missing
    MLUtils.setLabelsMissing(x_, L_z);
    // set dataset
    x_.setDataset(D_template);
    return x_;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Convert a multi-label instance into a multi-class instance, according to a template.
 *//*from   w w w. j av  a 2s.  c  o  m*/
public static Instance convertInstance(Instance x, int L, Instances template) {
    Instance x_ = (Instance) x.copy();
    x_.setDataset(null);
    for (int i = 0; i < L; i++)
        x_.deleteAttributeAt(0);
    x_.insertAttributeAt(0);
    x_.setDataset(template);
    return x_;
}

From source file:MetaBlocking.EnhancedMetaBlocking.FastImplementations.RedefinedCardinalityNodePruning.java

License:Open Source License

protected void verifyValidEntities(int entityId, Instances trainingInstances) {
    if (validEntities.isEmpty()) {
        return;/*w  ww.  ja va 2s.  c  o  m*/
    }

    topKEdges.clear();
    minimumWeight = Double.MIN_VALUE;
    Iterator<Integer> it = validEntitiesNeighbor.iterator();
    for (int neighborId : validEntities) {
        // System.out.println("comparison A" + entityId +"   "+ neighborId);
        // if(entityId==2516)
        //    System.out.println("2516 ---");
        double weight = getWeight(entityId, neighborId);
        int blockId = it.next();

        if (neighborId == 6792)
            System.out.println("ok");
        if (weight < minimumWeight) {
            continue;
        }

        Comparison comparison = getComparison(entityId, neighborId);

        comparison.setUtilityMeasure(weight);
        comparison.blockId = blockId;
        topKEdges.add(comparison);
        if (threshold < topKEdges.size()) {
            Comparison lastComparison = topKEdges.poll();
            minimumWeight = lastComparison.getUtilityMeasure();
        }
    }

    nearestEntities[entityId] = new HashSet<Comparison>(topKEdges);
    Iterator<Comparison> itb = nearestEntities[entityId].iterator();
    while (itb.hasNext()) {
        Comparison c = itb.next();
        int neighborId_clean;
        int neighborId = c.getEntityId1() == entityId ? c.getEntityId2() : c.getEntityId1();
        neighborId_clean = neighborId;
        if (neighborId_clean == 6792 || neighborId == 6792)
            System.out.println("ok");
        if (cleanCleanER && entityId < datasetLimit) {
            neighborId += datasetLimit;
        }
        //
        //            if (nearestEntities[neighborId] == null) {
        //                continue;
        //            }
        //
        //            if (nearestEntities[neighborId].contains(c)) {
        //                if(! (entityId < neighborId))
        //                   continue;
        //            }

        // System.out.println(entityId +" "+ neighborId);
        //            if(entityId>datasetLimit){
        //              int temp=neighborId_clean;
        //              neighborId=entityId;
        //              entityId=temp;
        //           }
        Comparison comp = new Comparison(true, entityId, neighborId_clean);

        final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(c.blockId, comp);
        if (commonBlockIndices == null)
            continue;

        double[] instanceValues = new double[8];

        double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(entityId, 0));
        double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(neighborId, 0));

        instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2;

        double raccb = 0;
        for (Integer index1 : commonBlockIndices) {
            raccb += 1.0 / comparisonsPerBlock[index1];
        }
        if (raccb < 1.0E-6) {
            raccb = 1.0E-6;
        }
        instanceValues[1] = raccb;
        instanceValues[2] = commonBlockIndices.size()
                / (redundantCPE[entityId] + redundantCPE[neighborId] - commonBlockIndices.size());
        instanceValues[3] = nonRedundantCPE[entityId];
        instanceValues[4] = nonRedundantCPE[neighborId];
        //      instanceValues[5] =   ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2());

        instanceValues[5] = neighborId;
        instanceValues[6] = entityId;//c.getUtilityMeasure(); 
        //(Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) *  getWeight(c.getEntityId1(), c.getEntityId2()+datasetLimit);

        instanceValues[7] = adp.isSuperfluous(c) == true ? 0 : 1;//adp.isSuperfluous(getComparison(c.getEntityId1(), c.getEntityId2()+datasetLimit))?1:0;

        Instance newInstance = new DenseInstance(1.0, instanceValues);
        newInstance.setDataset(trainingInstances);
        trainingInstances.add(newInstance);
        //         for (int i = 5; i < instanceValues.length-1; i++) {
        //            System.out.print(instanceValues[i] +" ");
        //         }
        //           System.out.println();
        //         if(instanceValues[6]!=instanceValues[5])
        //            System.out.println("erro");
        //         else
        //            System.out.print("...");
    }

}

From source file:milk.classifiers.MIBoost.java

License:Open Source License

/**
  * Builds the classifier/*from   ww w .  j  a v a2s  . co m*/
  *
  * @param train the training data to be used for generating the
  * boosted classifier.
  * @exception Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Exemplars exps) throws Exception {

     Exemplars train = new Exemplars(exps);

     if (train.classAttribute().type() != Attribute.NOMINAL) {
         throw new Exception("Class attribute must be nominal.");
     }
     if (train.checkForStringAttributes()) {
         throw new Exception("Can't handle string attributes!");
     }

     m_ClassIndex = train.classIndex();
     m_IdIndex = train.idIndex();
     m_NumClasses = train.numClasses();
     m_NumIterations = m_MaxIterations;

     if (m_NumClasses > 2) {
         throw new Exception("Not yet prepared to deal with multiple classes!");
     }

     if (m_Classifier == null)
         throw new Exception("A base classifier has not been specified!");
     if (!(m_Classifier instanceof WeightedInstancesHandler))
         throw new Exception("Base classifier cannot handle weighted instances!");

     m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
     if (m_Debug)
         System.err.println("Base classifier: " + m_Classifier.getClass().getName());

     m_Beta = new double[m_NumIterations];
     m_Attributes = new Instances(train.exemplar(0).getInstances(), 0);

     double N = (double) train.numExemplars(), sumNi = 0;
     Instances data = new Instances(m_Attributes, 0);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);

     // Initialize weights
     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         exi.setWeight(sumNi / N);
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy
             //insts.instance(j).setWeight(1.0);   

             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(exi.weight() / ni);
             data.add(ins);
         }
     }

     // Assume the order of the instances are preserved in the Discretize filter
     if (m_DiscretizeBin > 0) {
         m_Filter = new Discretize();
         m_Filter.setInputFormat(new Instances(data, 0));
         m_Filter.setBins(m_DiscretizeBin);
         data = Filter.useFilter(data, m_Filter);
     }

     // Main algorithm
     int dataIdx;
     iterations: for (int m = 0; m < m_MaxIterations; m++) {
         if (m_Debug)
             System.err.println("\nIteration " + m);
         // Build a model
         m_Models[m].buildClassifier(data);

         // Prediction of each bag
         double[] err = new double[(int) N], weights = new double[(int) N];
         boolean perfect = true, tooWrong = true;
         dataIdx = 0;
         for (int n = 0; n < N; n++) {
             Exemplar exn = train.exemplar(n);
             // Prediction of each instance and the predicted class distribution
             // of the bag      
             double nn = (double) exn.getInstances().numInstances();
             for (int p = 0; p < nn; p++) {
                 Instance testIns = data.instance(dataIdx++);
                 if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors
                     err[n]++;
             }
             weights[n] = exn.weight();
             err[n] /= nn;
             if (err[n] > 0.5)
                 perfect = false;
             if (err[n] < 0.5)
                 tooWrong = false;
         }

         if (perfect || tooWrong) { // No or 100% classification error, cannot find beta
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("No errors");
             break iterations;
         }

         double[] x = new double[1];
         x[0] = 0;
         double[][] b = new double[2][x.length];
         b[0][0] = Double.NaN;
         b[1][0] = Double.NaN;

         OptEng opt = new OptEng();
         opt.setWeights(weights);
         opt.setErrs(err);
         //opt.setDebug(m_Debug);
         if (m_Debug)
             System.out.println("Start searching for c... ");
         x = opt.findArgmin(x, b);
         while (x == null) {
             x = opt.getVarbValues();
             if (m_Debug)
                 System.out.println("200 iterations finished, not enough!");
             x = opt.findArgmin(x, b);
         }
         if (m_Debug)
             System.out.println("Finished.");
         m_Beta[m] = x[0];

         if (m_Debug)
             System.err.println("c = " + m_Beta[m]);

         // Stop if error too small or error too big and ignore this model
         if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) {
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("Errors out of range!");
             break iterations;
         }

         // Update weights of data and class label of wfData
         dataIdx = 0;
         double totWeights = 0;
         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0)));
             totWeights += exr.weight();
         }

         if (m_Debug)
             System.err.println("Total weights = " + totWeights);

         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             double num = (double) exr.getInstances().numInstances();
             exr.setWeight(sumNi * exr.weight() / totWeights);
             //if(m_Debug)
             //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
             for (int s = 0; s < num; s++) {
                 Instance inss = data.instance(dataIdx);
                 inss.setWeight(exr.weight() / num);
                 //    if(m_Debug)
                 //  System.err.print("instance "+s+"="+inss.weight()+
                 //          "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
                 if (Double.isNaN(inss.weight()))
                     throw new Exception("instance " + s + " in bag " + r + " has weight NaN!");
                 dataIdx++;
             }
             //if(m_Debug)
             //    System.err.println();
         }
     }
 }

From source file:milk.classifiers.MIWrapper.java

License:Open Source License

public Instances transform(Exemplars train) throws Exception {

     Instances data = new Instances(m_Attributes);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);
     double sumNi = 0, // Total number of instances
             N = train.numExemplars(); // Number of exemplars

     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     // Initialize weights
     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         // m_Prior[(int)exi.classValue()]++;
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy      
             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(sumNi / (N * ni));
             //ins.setWeight(1);
             data.add(ins);//from   www .  j  a va  2 s  .  c  o m
         }
     }

     return data;
 }

From source file:milk.classifiers.SimpleMI.java

License:Open Source License

public Instances transform(Exemplars train) throws Exception {

     Instances data = new Instances(m_Attributes);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);
     Instance template = new Instance(dataset.numAttributes());
     template.setDataset(dataset);
     double N = train.numExemplars(); // Number of exemplars

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         Instances insts = exi.getInstances();
         int attIdx = 0;
         Instance newIns = new Instance(template);
         newIns.setDataset(dataset);//from  ww w .  java 2 s.c  o  m
         for (int j = 0; j < insts.numAttributes(); j++) {
             if ((j == m_IdIndex) || (j == m_ClassIndex))
                 continue;
             double value;
             if (m_TransformMethod == 1) {
                 value = insts.meanOrMode(j);
             } else {
                 double[] minimax = minimax(insts, j);
                 value = (minimax[0] + minimax[1]) / 2.0;
             }
             newIns.setValue(attIdx++, value);
         }
         newIns.setClassValue(exi.classValue());
         data.add(newIns);
     }

     return data;
 }

From source file:mlflex.learners.WekaLearner.java

License:Open Source License

private static Instance GetInstance(Instances wekaInstances, FastVector wekaAttributeVector,
        Prediction prediction) throws Exception {
    Instance wekaInstance = new Instance(wekaAttributeVector.size());
    wekaInstance.setDataset(wekaInstances);

    wekaInstance.setValue((Attribute) wekaAttributeVector.elementAt(0), prediction.Prediction);
    wekaInstance.setValue((Attribute) wekaAttributeVector.elementAt(1), prediction.DependentVariableValue);

    return wekaInstance;
}

From source file:mlflex.WekaInMemoryLearner.java

License:Open Source License

private static Instance GetInstance(Instances wekaInstances, FastVector attVector, DataValues dataInstance,
        DataInstanceCollection dependentVariableInstances) throws Exception {
    Instance wekaInstance = new Instance(attVector.size());
    wekaInstance.setDataset(wekaInstances);

    for (int i = 0; i < attVector.size() - 1; i++) {
        Attribute attribute = (Attribute) attVector.elementAt(i);
        String dataPointValue = dataInstance.GetDataPointValue(attribute.name());

        SetAttributeValue(wekaInstance, attribute, dataPointValue);
    }//from w ww  .j a va  2 s .  c o m

    if (dependentVariableInstances != null)
        SetAttributeValue(wekaInstance, (Attribute) attVector.elementAt(attVector.size() - 1),
                dependentVariableInstances.Get(dataInstance.GetID())
                        .GetDataPointValue(Utilities.ProcessorVault.DependentVariableDataProcessor
                                .GetDependentVariableDataPointName()));

    return wekaInstance;
}

From source file:moa.classifiers.macros.TACNB.java

License:Open Source License

public Instance extendWithOldLabels(Instance instance) {
    if (this.header == null) {
        initHeader(instance.dataset());//www.  j  a  v a 2 s .com
    }
    int numLabels = this.oldLabels.length;
    if (numLabels == 0) {
        return instance;
    }
    double[] x = instance.toDoubleArray();
    double[] x2 = Arrays.copyOfRange(this.oldLabels, 0, numLabels + x.length);
    System.arraycopy(x, 0, x2, numLabels, x.length);
    Instance extendedInstance = new DenseInstance(instance.weight(), x2);
    extendedInstance.setDataset(this.header);
    //System.out.println( extendedInstance);
    return extendedInstance;
}

From source file:moa.classifiers.meta.RandomRules.java

License:Open Source License

private Instance transformInstance(Instance inst, int classifierIndex) {
    if (this.listAttributes == null) {
        this.numAttributes = (int) (this.numAttributesPercentageOption.getValue() * inst.numAttributes()
                / 100.0);/*from   w  w w  . java2 s  .c  om*/
        this.listAttributes = new int[this.numAttributes][this.ensemble.length];
        this.dataset = new InstancesHeader[this.ensemble.length];
        for (int ensembleIndex = 0; ensembleIndex < this.ensemble.length; ensembleIndex++) {
            for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) {
                boolean isUnique = false;
                while (isUnique == false) {
                    this.listAttributes[attributeIndex][ensembleIndex] = this.classifierRandom
                            .nextInt(inst.numAttributes() - 1);
                    isUnique = true;
                    for (int k = 0; k < attributeIndex; k++) {
                        if (this.listAttributes[attributeIndex][ensembleIndex] == this.listAttributes[k][ensembleIndex]) {
                            isUnique = false;
                            break;
                        }
                    }
                }
                //this.listAttributes[attributeIndex][ensembleIndex] = attributeIndex;
            }
            //Create Header
            FastVector attributes = new FastVector();
            for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) {
                attributes.addElement(inst.attribute(this.listAttributes[attributeIndex][ensembleIndex]));
                System.out.print(this.listAttributes[attributeIndex][ensembleIndex]);
            }
            System.out.println("Number of attributes: " + this.numAttributes + "," + inst.numAttributes());
            attributes.addElement(inst.classAttribute());
            this.dataset[ensembleIndex] = new InstancesHeader(
                    new Instances(getCLICreationString(InstanceStream.class), attributes, 0));
            this.dataset[ensembleIndex].setClassIndex(this.numAttributes);
            this.ensemble[ensembleIndex].setModelContext(this.dataset[ensembleIndex]);
        }
    }
    //Instance instance = new DenseInstance(this.numAttributes+1);
    //instance.setDataset(dataset[classifierIndex]);
    double[] attVals = new double[this.numAttributes + 1];
    for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) {
        //instance.setValue(attributeIndex, inst.value(this.listAttributes[attributeIndex][classifierIndex]));
        attVals[attributeIndex] = inst.value(this.listAttributes[attributeIndex][classifierIndex]);
    }
    Instance instance = new DenseInstance(1.0, attVals);
    instance.setDataset(dataset[classifierIndex]);
    instance.setClassValue(inst.classValue());
    // System.out.println(inst.toString());
    // System.out.println(instance.toString());
    // System.out.println("============");
    return instance;
}