Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:ocr.ARFFSymbolFilter.java

License:Apache License

public static void writeWeka(final String filenameout, final ArrayList<?> symbolData) {
    final int nsold = ARFFSymbolFilter.ns;
    ARFFSymbolFilter.tangent = (ARFFSymbolFilter.times > 1);
    try {/*from w  w  w .  j  a  v  a 2  s  .c  o m*/
        if (!ARFFSymbolFilter.strokenumber) {
            ARFFSymbolFilter.ns = 1;
        }
        final DataOutputStream[] fileout = new DataOutputStream[ARFFSymbolFilter.ns];
        final Instances[] instances = new Instances[ARFFSymbolFilter.ns];
        System.out.println("Writing file");
        for (int i = 0; i < ARFFSymbolFilter.ns; ++i) {
            final int k = ARFFSymbolFilter.strokenumber ? i : (nsold - 1);
            fileout[ARFFSymbolFilter.strokenumber ? i : 0] = new DataOutputStream(new FileOutputStream(
                    filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#"));
        }
        final int tot = symbolData.size();
        for (int j = 0; j < symbolData.size(); ++j) {
            final ArrayList<?> group = (ArrayList<?>) symbolData.get(j);
            for (int i = 0; i < group.size(); ++i) {
                final Symbol sym = (Symbol) group.get(i);
                final int k = ARFFSymbolFilter.strokenumber ? (sym.size() - 1) : 0;
                if (sym.name.equals("no_name") || sym.name.equals("empty_symbol")) {
                    System.out.print("#" + sym.name + "#");
                } else {
                    for (int t = 0; t < ARFFSymbolFilter.times; ++t) {
                        final String line = constructStringInstance(sym, ARFFSymbolFilter.alpha);
                        if (line == null) {
                            System.out.print("line=null!");
                        } else {
                            if (instances[k] == null) {
                                final StringTokenizer st = new StringTokenizer(line, " ");
                                final int nt = st.countTokens() / 2;
                                final FastVector att = new FastVector();
                                for (int kk = 0; kk < nt; ++kk) {
                                    final String token = st.nextToken();
                                    att.addElement(new Attribute(new String(token)));
                                    st.nextToken();
                                }
                                att.addElement(new Attribute("class", (FastVector) null));
                                (instances[k] = new Instances("Symbols of Size " + (k + 1), att, 1))
                                        .setClassIndex(att.size() - 1);
                            }
                            final StringTokenizer st = new StringTokenizer(line, " ");
                            final int nt = st.countTokens() / 2;
                            final Instance inst = new Instance(nt + 1);
                            for (int kk = 0; kk < nt; ++kk) {
                                st.nextToken();
                                final String token = new String(st.nextToken());
                                inst.setValue(kk, Double.parseDouble(token));
                            }
                            inst.setDataset(instances[k]);
                            inst.setClassValue(oldReplace(sym.name, "\\", ""));
                            instances[k].add(inst);
                        }
                    }
                }
            }
            if ((int) (100.0 * j) / tot % 10 == 0) {
                System.out.print((int) (100.0 * j) / tot + "%-");
            }
        }
        for (int k = 0; k < ARFFSymbolFilter.ns; ++k) {
            if (fileout[ARFFSymbolFilter.strokenumber ? k : 0] == null) {
                System.out.println("fo" + fileout[ARFFSymbolFilter.strokenumber ? k : 0]);
            }
            if (instances[ARFFSymbolFilter.strokenumber ? k : 0] == null) {
                System.out.println("in:" + instances[ARFFSymbolFilter.strokenumber ? k : 0]);
            }
            fileout[ARFFSymbolFilter.strokenumber ? k : 0]
                    .writeBytes(instances[ARFFSymbolFilter.strokenumber ? k : 0].toString());
            fileout[ARFFSymbolFilter.strokenumber ? k : 0].close();
        }
        final StringToNominal filter = new StringToNominal();
        final String[] args = new String[4];
        for (int k = 0; k < ARFFSymbolFilter.ns; ++k) {
            args[0] = "-i";
            args[1] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#";
            args[2] = "-o";
            args[3] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff";
            Filter.filterFile(filter, args);
            new File(args[1]).delete();
        }
        System.out.println("100.0%");
    } catch (FileNotFoundException fnfe) {
        fnfe.printStackTrace();
    } catch (Exception ioe) {
        ioe.printStackTrace();
    }
}

From source file:org.conqat.engine.commons.machine_learning.InstanceCreatorBase.java

License:Apache License

/** Creates a weka instance for the given classification object. */
public weka.core.Instance createWekaInstance(T classificationObject, LABEL label) {

    // create empty instance. The size of the instance corresponds to the
    // number of features plus the class label.
    weka.core.Instance instance = new weka.core.Instance(features.size() + 1);

    // calculate features
    for (int i = 0; i < features.size(); i++) {
        instance.setValue((Attribute) attributes.elementAt(i), features.get(i).getValue(classificationObject));
    }/*from  ww  w.  j av a  2 s . c om*/

    // set instance label
    Attribute classAttribute = (Attribute) attributes.elementAt(attributes.size() - 1);
    instance.setValue(classAttribute, label.name());

    return instance;

}

From source file:org.dkpro.similarity.ml.filters.LogFilter.java

License:Open Source License

@Override
protected Instance process(Instance inst) throws Exception {
    Instance newInst = new DenseInstance(inst.numAttributes());

    newInst.setValue(0, inst.value(0));

    for (int i = 1; i < inst.numAttributes() - 1; i++) {
        double newVal = Math.log(inst.value(i) + 1);
        // double newVal = inst.value(i);               // Passthrough

        newInst.setValue(i, newVal);//  w  w  w.  j a  va  2s.c  o m
    }

    newInst.setValue(inst.numAttributes() - 1, inst.value(inst.numAttributes() - 1));

    return newInst;
}

From source file:org.gc4mir.core.Core.java

public void addArffClasses() throws IOException, Exception {
    //opens classless arff
    BufferedReader reader = new BufferedReader(new FileReader("export/autoweka/clean.arff"));
    Instances data = new Instances(reader);
    reader.close();//from w w  w.j  a  v  a  2s  .co  m

    //add classes to file.

    //creates filter to add attribute
    Add filter;
    filter = new Add();
    filter.setAttributeIndex("last");

    //get category names to set in the header
    String nominalLabels = new String();
    for (int i = 0; i < getBaseCategories().size(); i++) {

        Category current = getBaseCategories().getCategories().get(i);
        nominalLabels += current.getId();
        if (i < getBaseCategories().size() - 1) {
            nominalLabels += ",";
        }
    }
    System.out.println("");
    filter.setNominalLabels(nominalLabels);
    filter.setAttributeName("Category");
    filter.setInputFormat(data);
    data = Filter.useFilter(data, filter);

    //add class values to each instance
    int classindex = data.numAttributes() - 1;

    for (int i = 0, n = 0; i < getBaseCategories().size(); i++) {
        Category current = getBaseCategories().getCategories().get(i);
        for (int k = 0; k < current.getRecordings().size(); k++) {
            Instance inst = data.instance(n);
            //inst.setDataset(null);
            inst.setValue(classindex, current.getId());
            //inst.setDataset(data);
            n++;
        }

        //inst.insertAttributeAt();
    }

    //saves arff file
    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(new File("export/autoweka/train.arff"));
    saver.writeBatch();

    System.out.println("File sucesfully saved, you can continue now");

}

From source file:org.hypknowsys.wumprep.WUMprepWrapper.java

License:Open Source License

/**
 * Creates a dummy dataset from the input format, sends it to the script and
 * reads the script output's ARFF information that in turn is used to set
 * <code>this</code>' output format.
 * //  w w w  .  ja v a2s . co m
 * This mechanism allows a WUMprep script to alter the recordset layout as
 * long as this change is documented by the output ARFF header. For example,
 * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field
 * to <code>host_dns</code> when performing IP lookups.
 * 
 * @param instanceInfo
 *          The input format.
 * @return Object containing the output instance structure.
 */
public Instances getScriptOutputFormat(Instances instanceInfo) {
    Instances outputFormat = instanceInfo;
    Instances testData = new Instances(instanceInfo);
    Instance testInstance = new Instance(testData.numAttributes());

    testData.delete();
    testInstance.setDataset(testData);

    // Initialize the testInstance's attribute values
    for (int i = 0; i < testInstance.numAttributes(); i++) {
        String aName = testInstance.attribute(i).name();
        if (aName.equals("host_ip"))
            testInstance.setValue(i, "127.0.0.1");
        else if (aName.equals("ts_day"))
            testInstance.setValue(i, "01");
        else if (aName.equals("ts_month"))
            testInstance.setValue(i, "Jan");
        else if (aName.equals("ts_year"))
            testInstance.setValue(i, "2005");
        else if (aName.equals("ts_hour"))
            testInstance.setValue(i, "11");
        else if (aName.equals("ts_minutes"))
            testInstance.setValue(i, "55");
        else if (aName.equals("ts_seconds"))
            testInstance.setValue(i, "00");
        else if (aName.equals("tz"))
            testInstance.setValue(i, "+0200");
        else
            testInstance.setValue(i, aName + "-dummy");
    }

    testData.add(testInstance);

    WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args);
    testWrapper.start();
    testWrapper.push(testData.toString());
    testWrapper.push((Instance) null);

    class ErrorReader extends Thread implements Serializable {
        /**  */
        private static final long serialVersionUID = -488779846603045891L;
        PipedReader m_input = null;

        /**
         * Helper class for reading stderr output from the WUMprep script
         * 
         * @param input The script's wrapper's stderr pipe reader
         */
        ErrorReader(PipedReader input) {
            m_input = input;
            this.start();
        }

        public void run() {
            try {
                while (m_input.read() >= 0)
                    ;
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    // read the stderr output
    new ErrorReader(testWrapper.getErrorPipe());

    try {
        // ignore the stderr output
        outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe());

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return outputFormat;
}

From source file:org.iobserve.analysis.behavior.clustering.hierarchical.ElbowMethod.java

License:Apache License

/**
 * Calculate within-cluster sum-of-square (WSS) for a given cluster.
 *
 * @param cluster//from   w ww .j av  a2 s  . c o  m
 *            Calculate the WSS for this cluster.
 * @return WSS
 **/
public double calcWSS(final List<Integer> cluster) {

    final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction();
    final double[] sumAttValues = new double[this.instances.numAttributes()];
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        // Sum up all values of all instances.
        for (int j = 0; j < this.instances.numAttributes(); j++) {
            sumAttValues[j] += instance.value(j);
        }
    }
    // Get average value of each attribute value.
    for (int j = 0; j < sumAttValues.length; j++) {
        sumAttValues[j] /= cluster.size();
    }

    /*
     * Create a centroid of this cluster by setting the average attributes of this cluster as
     * its own.
     */
    final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy();
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        centroid.setValue(j, sumAttValues[j]);
    }
    // Sum up distances of each data point in cluster to centroid to get WSS.
    double clusterWSS = 0.0;
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        clusterWSS += Math.pow(distanceFunction.distance(centroid, instance), 2);
    }
    return clusterWSS;
}

From source file:org.iobserve.analysis.behavior.clustering.hierarchical.GapStatisticMethod.java

License:Apache License

/**
 * Calculated error sum-of-squares (ESS) for a given cluster.
 *
 * @param cluster//from ww w .  j  a v a2s .  c o  m
 *            Calculate the ESS for this cluster
 * @return ESS
 **/
public double calcESS(final List<Integer> cluster) {

    if ((cluster.size() == 0) || (cluster.size() == 1)) {
        return 0.0;
    }
    final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction();
    final double[] sumAttValues = new double[this.instances.numAttributes()];
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        // Sum up all values of all instances.
        for (int j = 0; j < this.instances.numAttributes(); j++) {
            sumAttValues[j] += instance.value(j);
        }
    }
    // Get average value of each attribute value.
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        sumAttValues[j] /= cluster.size();
    }

    /*
     * Create a centroid of this cluster by setting the average attributes of this cluster as
     * its own.
     */
    final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy();
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        centroid.setValue(j, sumAttValues[j]);
    }
    // Sum up distances of each data point in cluster to centroid to get ESS.
    double clusterESS = 0.0;
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        clusterESS += distanceFunction.distance(centroid, instance);
    }
    return clusterESS / cluster.size();
}

From source file:org.iobserve.analysis.behavior.karlsruhe.AbstractClustering.java

License:Apache License

/**
 * It transforms the user sessions(userSessions in form of counts of their called operation
 * signatures) to Weka instances that can be used for the clustering.
 *
 * @param countModel/*from   w w  w.  j a  v  a2  s .c om*/
 *            contains the userSessions in form of counts of called operation signatures
 * @param listOfDistinctOperationSignatures
 *            contains the extracted distinct operation signatures of the input
 *            entryCallSequenceModel
 * @return the Weka instances that hold the data that is used for the clustering
 */
protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel,
        final List<String> listOfDistinctOperationSignatures) {

    final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size();
    final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures);

    for (int i = 0; i < numberOfDistinctOperationSignatures; i++) {
        final String attributeName = "Attribute" + i;
        final Attribute attribute = new Attribute(attributeName);
        fvWekaAttributes.addElement(attribute);
    }

    final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size());

    for (final UserSessionAsCountsOfCalls userSession : countModel) {

        int indexOfAttribute = 0;
        final Instance instance = new Instance(numberOfDistinctOperationSignatures);

        for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) {
            instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute),
                    userSession.getAbsoluteCountOfCalls()[row]);
            indexOfAttribute++;
        }

        clusterSet.add(instance);
    }

    return clusterSet;
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w ww  .  jav  a  2s. co  m*/
 */
@Override
public void buildClassifier(ContourDataGrid cData, VectorDataList bgData) throws Exception {

    // transform input data to weka mi-instances
    m_data = initDataset(cData.numFeatures(), 2, cData.totalLength() + bgData.numVectors(), cData.width());

    for (int r = 0; r < cData.totalLength(); r++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        for (int c = 0; c < cData.width(); c++) {
            int vecIdx = cData.getVectorIdx(c, r);
            Instance inst = new DenseInstance(cData.weight(vecIdx), cData.getVector(vecIdx));
            inst.setDataset(bagData);
            bagData.add(inst);
        }
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, r); // bag id
        newBag.setValue(2, 1); // class attribute
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    for (int i = 0; i < bgData.numVectors(); i++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        Instance inst = new DenseInstance(bgData.weight(i), bgData.getVector(i));
        inst.setDataset(bagData);
        bagData.add(inst);
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, cData.totalLength() + i);
        newBag.setValue(2, 0);
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    m_classifier.buildClassifier(m_data);
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java

License:Open Source License

/**
 * {@inheritDoc}//  w ww  .j  a va 2  s  .  c o m
 */
@Override
public double contourProbability(double[] inst) throws Exception {
    Instances bagData = new Instances(m_data.attribute(1).relation(), 1);
    Instance i = new DenseInstance(1, inst);
    i.setDataset(bagData);

    bagData.add(i);

    Instance bag = new DenseInstance(3);
    bag.setDataset(m_data);
    int val = bag.attribute(1).addRelation(bagData);
    bag.setValue(1, val);

    return m_classifier.distributionForInstance(bag)[1];

}