Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:DocClassifier.java

public Instances createInstances(File[] files) {
    Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length);
    for (File file : files) {
        Instance inst = createInstance(file);
        inst.setDataset(instances);/*from   ww  w .ja  va2 s. c om*/
        instances.add(inst);
        instances.setClass((Attribute) attrList.lastElement());
    }
    return instances;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Gets the transformed training data.//from  w  w  w .  j  av a  2  s . c  o m
 *
 * @return the transformed training data
 * @throws Exception if transformed data can't be returned
 */
@Override
public Instances transformedData(Instances data) throws Exception {
    if (m_eigenvalues == null) {
        throw new Exception("Principal components hasn't been built yet");
    }

    Instances output = null;

    if (m_transBackToOriginal) {
        output = new Instances(m_originalSpaceFormat);
    } else {
        output = new Instances(m_transformedFormat);
    }
    for (int i = 0; i < data.numInstances(); i++) {
        Instance converted = convertInstance(data.instance(i));
        output.add(converted);
    }

    return output;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from www  . j  a  v  a2  s. com
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    FastVector classes = new FastVector();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.addElement(enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.elementAt(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedReader is;
                if (m_charSet == null || m_charSet.length() == 0) {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
                } else {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
                }

                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }

                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set./* w  w  w .  j a  v  a  2  s.  c o  m*/
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/*from w ww .ja  v  a2s . c  o  m*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:ArffLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by a
 * call to getStructure then method should do so before processing the rest of
 * the data set./*from   ww  w .  j av  a2 s .co  m*/
 * 
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */

public Instances getDataSet() throws IOException {

    Instances insts = null;
    try {
        if (m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }

        if (m_structure == null) {
            getStructure();
        }

        // Read all instances
        Instance inst;
        insts = new Instances(m_structure, 0);
        while ((inst = m_ArffReader.readInstance(m_structure)) != null) {
            insts.add(inst);
        }

        // Instances readIn = new Instances(m_structure);
    } finally {
        if (m_sourceReader != null) {
            // close the stream
            m_sourceReader.close();
        }
    }

    return insts;
}

From source file:TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("contents", (FastVector) null));
    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);

    File dir = new File(directoryPath);
    String[] files = dir.list();/*from ww w . jav a 2s  .  c om*/
    for (int i = 0; i < files.length; i++) {
        if (files[i].endsWith(".txt")) {
            try {
                double[] newInst = new double[2];
                newInst[0] = (double) data.attribute(0).addStringValue(files[i]);
                File txt = new File(directoryPath + File.separator + files[i]);
                InputStreamReader is;
                is = new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }
                newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString());
                data.add(new Instance(1.0, newInst));
            } catch (Exception e) {
                //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
            }
        }
    }
    return data;
}

From source file:SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.//from   www  .ja v  a 2  s .  c o m
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper 
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:MPCKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Instances in data have to be
 * either all sparse or all non-sparse/*from www . jav  a  2  s.com*/
 *
 * @param data set of instances serving as training data 
 * @exception Exception if the clusterer has not been 
 * generated successfully
 */
public void buildClusterer(Instances data) throws Exception {
    System.out.println("ML weight=" + m_MLweight);
    System.out.println("CL weight= " + m_CLweight);
    System.out.println("LOG term weight=" + m_logTermWeight);
    System.out.println("Regularizer weight= " + m_regularizerTermWeight);
    m_RandomNumberGenerator = new Random(m_RandomSeed);

    if (m_metric instanceof OfflineLearnableMetric) {
        m_isOfflineMetric = true;
    } else {
        m_isOfflineMetric = false;
    }

    // Don't rebuild the metric if it was already trained
    if (!m_metricBuilt) {
        m_metric.buildMetric(data);
        m_metricBuilt = true;
        m_metricLearner.setMetric(m_metric);
        m_metricLearner.setClusterer(this);

        m_metrics = new LearnableMetric[m_NumClusters];
        m_metricLearners = new MPCKMeansMetricLearner[m_NumClusters];
        for (int i = 0; i < m_metrics.length; i++) {
            if (m_useMultipleMetrics) {
                m_metrics[i] = (LearnableMetric) m_metric.clone();
                m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone();
                m_metricLearners[i].setMetric(m_metrics[i]);
                m_metricLearners[i].setClusterer(this);
            } else {
                m_metrics[i] = m_metric;
                m_metricLearners[i] = m_metricLearner;
            }
        }
    }

    setInstances(data);
    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);
    m_ClusterAssignments = new int[m_Instances.numInstances()];

    if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) {
        throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n");
    }

    m_ClusterCentroids = m_Initializer.initialize();

    // if all instances are smoothed by the metric, the centroids
    // need to be smoothed too (note that this is independent of
    // centroid smoothing performed by K-Means)
    if (m_metric instanceof InstanceConverter) {
        System.out.println("Converting centroids...");
        Instances convertedCentroids = new Instances(m_ClusterCentroids, m_NumClusters);
        for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) {
            Instance centroid = m_ClusterCentroids.instance(i);
            convertedCentroids.add(((InstanceConverter) m_metric).convertInstance(centroid));
        }

        m_ClusterCentroids.delete();
        for (int i = 0; i < convertedCentroids.numInstances(); i++) {
            m_ClusterCentroids.add(convertedCentroids.instance(i));
        }
    }

    System.out.println("Done initializing clustering ...");
    getIndexClusters();

    if (m_verbose && m_Seedable) {
        printIndexClusters();
        for (int i = 0; i < m_NumClusters; i++) {
            System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i));
        }
    }

    // Some extra work for smoothing metrics
    if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) {

        SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric;
        Instances smoothedCentroids = new Instances(m_Instances, m_NumClusters);

        for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) {
            Instance smoothedCentroid = smoothingMetric.smoothInstance(m_ClusterCentroids.instance(i));
            smoothedCentroids.add(smoothedCentroid);
        }
        m_ClusterCentroids = smoothedCentroids;

        updateSmoothingMetrics();
    }

    runKMeans();
}

From source file:PCADetector.java

License:Apache License

public Instances getInstances() {
    int numAtts = m_oriDataMatrix.size();
    if (numAtts < 0)
        return null;
    ArrayList<Attribute> atts = new ArrayList<Attribute>(numAtts);
    for (int att = 0; att < numAtts; att++) {
        atts.add(new Attribute(Integer.toString(att), att));
    }/*from w w  w. j a v a2s .c  om*/
    int numInstances = m_oriDataMatrix.get(0).size();
    if (numInstances <= 0)
        return null;
    Instances dataset = new Instances("MetricInstances", atts, numInstances);
    for (int inst = 0; inst < numInstances; inst++) {
        Instance newInst = new DenseInstance(numAtts);
        for (int att = 0; att < numAtts; att++) {
            newInst.setValue(att, m_oriDataMatrix.get(att).get(inst));
        }
        dataset.add(newInst);
    }
    return dataset;
}