Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:wekimini.DataGenerator.java

private void addToDummyInstances(double[] inputs, double[] outputs, boolean[] recordingMask,
        int recordingRound) {
    int thisId = nextID;
    nextID++;//from  ww  w  . j a v  a 2  s.c  om

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = trainingInputs.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            w.getDataManager().setNumExamplesPerOutput(i, w.getDataManager().getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(dummyInstances);
    dummyInstances.add(in);
    //setHasInstances(true);
    //fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wekimini.DataManager.java

public void addToTraining(double[] inputs, double[] outputs, boolean[] recordingMask, int recordingRound) {

    int thisId = nextID;
    nextID++;//from  w  ww . j  ava2 s.  c om

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = allInstances.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            setNumExamplesPerOutput(i, getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(allInstances);
    allInstances.add(in);
    setHasInstances(true);
    fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wekimini.DataManager.java

public void reAddDeletedTrainingRound() {
    if (deletedTrainingRound != null) {
        for (Instance in : deletedTrainingRound) {
            for (int j = 0; j < numOutputs; j++) {
                if (!in.isMissing(numMetaData + numInputs + j)) {
                    setNumExamplesPerOutput(j, getNumExamplesPerOutput(j) + 1);
                }//from ww w.  ja v  a  2  s  .c  o m
            }

            in.setDataset(allInstances);
            allInstances.add(in);
            setHasInstances(true);
            fireStateChanged();
        }

        //Could get interesting behavior if we allow multiple re-adds; don't do this now.
        deletedTrainingRound = null;
    }
}

From source file:wekimini.InputGenerator.java

public void addToStorage(double[] inputs, double[] outputs, boolean[] recordingMask, int recordingRound) {

    int thisId = nextID;
    nextID++;/*from   ww  w .  ja  v a2s.c  o  m*/

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = storedInputs.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            w.getDataManager().setNumExamplesPerOutput(i, w.getDataManager().getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(storedInputs);
    storedInputs.add(in);
    //setHasInstances(true);
    //fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wekimini.InputGenerator.java

public void addToTraining(double[] inputs, double[] outputs, boolean[] recordingMask, int recordingRound) {

    int thisId = nextID;
    nextID++;/*ww w .j av  a 2  s.c om*/

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = trainingInputs.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            w.getDataManager().setNumExamplesPerOutput(i, w.getDataManager().getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(trainingInputs);
    trainingInputs.add(in);
    //setHasInstances(true);
    //fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wekimini.OutputGenerator.java

public void addToStorage(double[] inputs, double[] outputs, boolean[] recordingMask, int recordingRound) {

    int thisId = nextID;
    nextID++;/*from   w  w  w . j a va2 s.c  om*/

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = storedOutputs.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            w.getDataManager().setNumExamplesPerOutput(i, w.getDataManager().getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(storedOutputs);
    storedOutputs.add(in);
    //setHasInstances(true);
    //fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wekimini.OutputGenerator.java

public void addToTrainingOutputs(double[] inputs, double[] outputs, boolean[] recordingMask,
        int recordingRound) {

    int thisId = nextID;
    nextID++;//  ww w. j  a va  2s.  co m

    double myVals[] = new double[numMetaData + numInputs + numOutputs];
    myVals[idIndex] = thisId;
    myVals[recordingRoundIndex] = recordingRound;

    Date now = new Date();
    //myVals[timestampIndex] = Double.parseDouble(dateFormat.format(now)); //Error: This gives us scientific notation!

    String pretty = prettyDateFormat.format(now);
    try {
        myVals[timestampIndex] = trainingOutputs.attribute(timestampIndex).parseDate(pretty);
        //myVals[timestampIndex] =
    } catch (ParseException ex) {
        myVals[timestampIndex] = 0;
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
    }

    /*for (int i = 0; i < numInputs; i++) {
     myVals[numMetaData + i] = featureVals[i];
     } */
    System.arraycopy(inputs, 0, myVals, numMetaData, inputs.length); //TODO DOUBLECHECK

    /*for (int i = 0; i < numParams; i++) {
     if (isParamDiscrete[i] && (paramVals[i] < 0 || paramVals[i] >= numParamValues[i])) {
     throw new IllegalArgumentException("Invalid value for this discrete parameter");
     }
            
     myVals[numMetaData + numFeatures + i] = paramVals[i];
     } */
    System.arraycopy(outputs, 0, myVals, numMetaData + numInputs, outputs.length);

    Instance in = new Instance(1.0, myVals);
    for (int i = 0; i < recordingMask.length; i++) {
        if (!recordingMask[i]) {
            in.setMissing(numMetaData + numInputs + i);
        } else {
            w.getDataManager().setNumExamplesPerOutput(i, w.getDataManager().getNumExamplesPerOutput(i) + 1);
            // outputInstanceCounts[i]++;
        }
    }
    in.setDataset(trainingOutputs);
    trainingOutputs.add(in);
    //setHasInstances(true);
    //fireStateChanged();
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}

From source file:wtute.engine.AnalysisEngine.java

public void train() throws Exception {

    Instances trainingInstances = createInstances("TRAINING INS");
    for (int i = 0; i < data.numInstances(); i++) {
        Instance instance = convertInstance(data.instance(i));

        instance.setDataset(trainingInstances);
        trainingInstances.add(instance);
    }/*from   ww w .j  a v a  2 s. co  m*/

    System.out.println(data);
    J48 classifier = new J48();

    try {
        //classifier training code
        classifier.buildClassifier(trainingInstances);

        //storing the trained classifier to a file for future use
        weka.core.SerializationHelper.write("J48.model", classifier);
    } catch (Exception ex) {
        System.out.println("Exception in training the classifier.");
    }
}

From source file:zhaop.textmining.proj.MyStringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from   w  w w  .ja  va  2 s.  c  o  m
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        // if ((getInputFormat().attribute(j).type() == Attribute.STRING)
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down
                                          // to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    // Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    // Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}