Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:adams.ml.data.WekaConverter.java

License:Open Source License

/**
 * Turns an ADAMS dataset row into a Weka Instance.
 *
 * @param data   the dataset to use as template
 * @param row      the row to convert// ww  w  . ja  va  2s  .c o m
 * @return      the generated instance
 * @throws Exception   if conversion fails
 */
public static Instance toInstance(Instances data, Row row) throws Exception {
    Instance result;
    double[] values;
    int i;
    Cell cell;
    Attribute att;

    values = new double[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
        values[i] = Utils.missingValue();

        if (!row.hasCell(i))
            continue;
        cell = row.getCell(i);
        if (cell.isMissing())
            continue;

        att = data.attribute(i);
        switch (att.type()) {
        case Attribute.NUMERIC:
            values[i] = cell.toDouble();
            break;
        case Attribute.DATE:
            values[i] = cell.toAnyDateType().getTime();
            break;
        case Attribute.NOMINAL:
            values[i] = att.indexOfValue(cell.getContent());
            break;
        case Attribute.STRING:
            values[i] = att.addStringValue(cell.getContent());
            break;
        default:
            throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att));
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(data);

    return result;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options./*from  w w w .  j ava  2  s .  co m*/
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * //from  ww w .j a  v  a  2 s .com
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {

        // The centroid is the mean point. If the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
                || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
                || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
                || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir
                || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:affective.core.ArffLexiconEvaluator.java

License:Open Source License

/**
 * Processes  all the dictionary files./*w  w  w .ja  va2 s .  co  m*/
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    List<Attribute> numericAttributes = new ArrayList<Attribute>();
    List<Attribute> nominalAttributes = new ArrayList<Attribute>();

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric()) {
                numericAttributes.add(lexInstances.attribute(i));
                // adds the attribute name to the message-level features to be calculated
                this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name());
            }

            else if (lexInstances.attribute(i).isNominal()) {
                nominalAttributes.add(lexInstances.attribute(i));
                // adds the attribute name together with the nominal value to the message-level features to be calculated
                int numValues = lexInstances.attribute(i).numValues();
                for (int j = 0; j < numValues; j++)
                    this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name() + "-"
                            + lexInstances.attribute(i).value(j));

            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!numericAttributes.isEmpty()) {
                Map<String, Double> wordVals = new HashMap<String, Double>();
                for (Attribute na : numericAttributes) {
                    if (!weka.core.Utils.isMissingValue(inst.value(na)))
                        wordVals.put(na.name(), inst.value(na));
                }
                this.numDict.put(word, wordVals);
            }

            // map nominal associations
            if (!nominalAttributes.isEmpty()) {
                Map<String, String> wordCounts = new HashMap<String, String>();
                for (Attribute no : nominalAttributes) {
                    if (!weka.core.Utils.isMissingValue(inst.value(no))) {
                        wordCounts.put(no.name(), no.value((int) inst.value(no)));
                    }

                    this.nomDict.put(word, wordCounts);

                }

            }

        }

    }

}

From source file:affective.core.ArffLexiconWordLabeller.java

License:Open Source License

/**
 * Processes  all the dictionary files.//from   ww w.  ja v a2  s. c  o m
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) {
                this.attributes.add(lexInstances.attribute(i));
            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!attributes.isEmpty()) {
                Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>();
                for (Attribute na : attributes) {
                    wordVals.put(na, inst.value(na));
                }
                this.attValMap.put(word, wordVals);
            }

        }

    }

}

From source file:AnDE.wdAnDEonline.java

License:Open Source License

@Override
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances.deleteWithMissingClass();/*  www . j  a v a  2s . c om*/
    nInstances = instances.numInstances();
    nAttributes = instances.numAttributes() - 1;
    nc = instances.numClasses();

    probs = new double[nc];

    paramsPerAtt = new int[nAttributes];
    for (int u = 0; u < nAttributes; u++) {
        paramsPerAtt[u] = instances.attribute(u).numValues();
    }

    /*
     * Initialize structure array based on m_S
     */
    if (m_S.equalsIgnoreCase("A0DE")) {
        // A0DE
        numTuples = 0;
    } else if (m_S.equalsIgnoreCase("A1DE")) {
        // A1DE         
        numTuples = 1;
    } else if (m_S.equalsIgnoreCase("A2DE")) {
        // A2DE         
        numTuples = 2;
    }

    /* 
     * ----------------------------------------------------------------------------------------
     * Start Parameter Learning Process
     * ----------------------------------------------------------------------------------------
     */

    int scheme = 1;

    /*
     * ---------------------------------------------------------------------------------------------
     * Intitialize data structure
     * ---------------------------------------------------------------------------------------------
     */

    scheme = plTechniques.MAP;

    logDComputer = LogDistributionComputerAnDE.getDistributionComputer(numTuples, scheme);

    dParameters_ = new wdAnDEParametersFlat(nAttributes, nc, nInstances, paramsPerAtt, scheme, numTuples,
            m_MVerb);

    if (m_MVerb)
        System.out.println("All data structures are initialized. Starting to estimate parameters.");

    if (nInstances > 0) {
        for (int i = 0; i < nInstances; i++) {
            Instance instance = instances.instance(i);
            dParameters_.updateFirstPass(instance);
        }
    }
}

From source file:app.RunApp.java

License:Open Source License

/**
 * Action when table attributes clicked with left mouse
 * //from w ww . j  av  a 2  s  .  co  m
 * @param evt Event
 */
private void tableAttributesLeftMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_tableAttributesLeftMouseClicked
    if (tabsAttributes.getSelectedIndex() == 0) {
        int selected = tableAttributesLeft.getSelectedRow();

        String attr = tableAttributesLeft.getValueAt(selected, 0).toString();

        Instances instances = dataset.getDataSet();

        Attribute currentAttr = instances.attribute(attr);

        double[] attributeValues = instances.attributeToDoubleArray(currentAttr.index());

        HeapSort.sort(attributeValues);

        attributesBoxDiagram2.getChart().setTitle(currentAttr.name());

        attributesBoxDiagram2.getChart().getXYPlot().clearAnnotations();

        ChartUtils.updateXYChart(attributesBoxDiagram2, HeapSort.getSortedArray());
    }
}

From source file:app.RunApp.java

License:Open Source License

/**
 * Generates TableModel for attributes/*from  ww w .  j  a  v a 2s .c o  m*/
 * 
 * @param jtable Table
 * @param dataset Multi-label dataset
 * @return Generated TableModel
 */
private TableModel attributesTableModel(JTable jtable, MultiLabelInstances dataset) {
    DefaultTableModel tableModel = new DefaultTableModel() {
        @Override
        public boolean isCellEditable(int row, int column) {
            //This causes all cells to be not editable
            return false;
        }
    };

    tableModel.addColumn("Attribute");

    Object[] row = new Object[1];

    Instances instances = dataset.getDataSet();

    int numLabels = dataset.getNumLabels();

    int numAttributes = instances.numAttributes() - numLabels;

    Attribute att;
    for (int i = 0; i < numAttributes; i++) {
        att = instances.attribute(i);
        if (att.isNumeric()) {
            row[0] = att.name();
            tableModel.addRow(row);
        }
    }

    jtable.setModel(tableModel);

    return jtable.getModel();
}

From source file:arffcreator.arffFrame.java

private void createActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_createActionPerformed
    // TODO add your handling code here:

    FastVector atts;/*from w  w  w.j  a  va2 s .com*/
    FastVector attsRel;
    FastVector attVals;
    FastVector attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new FastVector();
    // - numeric
    atts.addElement(new Attribute("att1"));
    // - nominal
    attVals = new FastVector();
    for (i = 0; i < 5; i++)
        attVals.addElement("val" + (i + 1));
    atts.addElement(new Attribute("att2", attVals));
    // - string
    atts.addElement(new Attribute("att3", (FastVector) null));
    // - date
    atts.addElement(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new FastVector();
    // -- numeric
    attsRel.addElement(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new FastVector();
    for (i = 0; i < 5; i++)
        attValsRel.addElement("val5." + (i + 1));
    attsRel.addElement(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.addElement(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // 4. output data

    textArea.append(data.toString());

    dataset = data.toString();

}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

/**
 * Crea instancias a partir del texto plano que se encuentra en los archivos dentro de las carpetas del path especificado.
 * Las instancias tendr&aacute;n como atributos un nominal con la clase (seran los nombres de las carpetas que contienen los archivos) 
 * y un String con el texto que aparece en los ficheros.
 * @param clase - String que indica los posibles valores de la clase en el fichero arff que se crear&aacute;.
 * @param directoryPath - String que indica el path del directorio donde se encuentran los ficheros/directorios con las instancias.
 * @param data - Conjunto de instancias que forman la estructura para guardar las nuevas instancias.
 *//*from w  ww.j  av a2s .c o m*/
private void cargarAtrribDeClase(String clase, String directoryPath, Instances data) {
    System.out.println("Se crean las instancias de la clase: " + clase);
    File dir = new File(directoryPath);
    String[] files = dir.list();
    for (int i = 0; i < files.length; i++) {
        if (files[i].endsWith(".txt")) {
            try {
                double[] newInst = new double[2];
                File txt = new File(directoryPath + File.separator + files[i]);
                InputStreamReader is;
                is = new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    c = (char) c;
                    if (!isFakeChar((char) c))
                        txtStr.append((char) c);
                }
                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (clase == null) {
                    newInst[1] = Double.NaN;
                } else {
                    newInst[1] = (double) data.attribute(1).indexOfValue(clase);
                }
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
            }
        }
    }
    System.out.println("Se crearon las instancias de la clase: " + clase);
}