Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:ID3Chi.java

License:Open Source License

/**
 * Method for building an ID3Chi tree.//  w  w  w  . j  av a 2 s  .  c o m
 *
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    /*
    if (data.numInstances() == 0) {
       m_Attribute = null;
       m_ClassValue = Instance.missingValue();
       m_Distribution = new double[data.numClasses()];
       return;
    }
    /**/
    if (data.numInstances() == 0) {
        SetNullDistribution(data);
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    double entropyOfAllData = computeEntropy(data);

    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    double chiSquare = computeChiSquare(data, m_Attribute);

    int degreesOfFreedom = m_Attribute.numValues() - 1;
    ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom);
    double threshold = chi.inverseCumulativeProbability(m_confidenceLevel);

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        MakeALeaf(data);
    } else {
        // Discard unknown values for selected attribute
        //data.deleteWithMissing(m_Attribute);
        Instances[] subset = splitData(data, m_Attribute);

        if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) {
            MakeALeaf(data);
            return;
        }

        m_Successors = new ID3Chi[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new ID3Chi(this.m_confidenceLevel);
            m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances();
            m_Successors[j].makeTree(subset[j]);
        }
    }
}

From source file:aaa.util.test.CreateArff.java

License:Open Source License

/**
 * Generates the Instances object and outputs it in ARFF format to stdout.
 *
 * @param args   ignored//from  ww w.ja v a 2s  . c om
 * @throws Exception   if generation of instances fails
 */
public static void main(String[] args) throws Exception {
    ArrayList<Attribute> atts;
    ArrayList<Attribute> attsRel;
    ArrayList<String> attVals;
    ArrayList<String> attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new ArrayList<Attribute>();
    // - numeric
    atts.add(new Attribute("att1"));
    // - nominal
    attVals = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attVals.add("val" + (i + 1));
    atts.add(new Attribute("att2", attVals));
    // - string
    atts.add(new Attribute("att3", (ArrayList<String>) null));
    // - date
    atts.add(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new ArrayList<Attribute>();
    // -- numeric
    attsRel.add(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attValsRel.add("val5." + (i + 1));
    attsRel.add(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.add(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    // - date
    vals[3] = data.attribute(3).parseDate("2001-11-09");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    // - date
    vals[3] = data.attribute(3).parseDate("2000-12-01");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // 4. output data
    System.out.println(data);
}

From source file:adams.data.conversion.SpreadSheetToWekaInstances.java

License:Open Source License

/**
 * Performs the actual conversion./*from  ww  w.j a v  a  2  s  .  co m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    Instances result;
    SpreadSheet sheet;
    DenseInstance inst;
    ArrayList<Attribute> atts;
    HashSet<String> unique;
    ArrayList<String> labels;
    Row row;
    Cell cell;
    int i;
    int n;
    double[] values;
    Collection<ContentType> types;
    ContentType type;
    boolean added;
    int[] classIndices;

    sheet = (SpreadSheet) m_Input;

    // create header
    atts = new ArrayList<>();
    for (i = 0; i < sheet.getColumnCount(); i++) {
        added = false;
        types = sheet.getContentTypes(i);
        if (types.contains(ContentType.DOUBLE))
            types.remove(ContentType.LONG);
        if (types.contains(ContentType.LONG)) {
            types.add(ContentType.DOUBLE);
            types.remove(ContentType.LONG);
        }

        if (types.size() == 1) {
            type = (ContentType) types.toArray()[0];
            if (type == ContentType.DOUBLE) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent()));
                added = true;
            } else if (type == ContentType.DATE) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(),
                        Constants.TIMESTAMP_FORMAT));
                added = true;
            } else if (type == ContentType.TIME) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIME_FORMAT));
                added = true;
            }
        }

        if (!added) {
            unique = new HashSet<>();
            for (n = 0; n < sheet.getRowCount(); n++) {
                row = sheet.getRow(n);
                cell = row.getCell(i);
                if ((cell != null) && !cell.isMissing())
                    unique.add(cell.getContent());
            }
            if ((unique.size() > m_MaxLabels) || (m_MaxLabels < 1)) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), (FastVector) null));
            } else {
                labels = new ArrayList<>(unique);
                Collections.sort(labels);
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels));
            }
        }
    }
    result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount());
    if (sheet.hasName())
        result.setRelationName(sheet.getName());

    // add data
    for (n = 0; n < sheet.getRowCount(); n++) {
        row = sheet.getRow(n);
        values = new double[result.numAttributes()];
        for (i = 0; i < result.numAttributes(); i++) {
            cell = row.getCell(i);
            values[i] = weka.core.Utils.missingValue();
            if ((cell != null) && !cell.isMissing()) {
                if (result.attribute(i).type() == Attribute.DATE) {
                    if (cell.isTime())
                        values[i] = cell.toTime().getTime();
                    else
                        values[i] = cell.toDate().getTime();
                } else if (result.attribute(i).isNumeric()) {
                    values[i] = Utils.toDouble(cell.getContent());
                } else if (result.attribute(i).isString()) {
                    values[i] = result.attribute(i).addStringValue(cell.getContent());
                } else {
                    values[i] = result.attribute(i).indexOfValue(cell.getContent());
                }
            }
        }
        inst = new DenseInstance(1.0, values);
        result.add(inst);
    }

    if (sheet instanceof Dataset) {
        classIndices = ((Dataset) sheet).getClassAttributeIndices();
        if (classIndices.length > 0)
            result.setClassIndex(classIndices[0]);
    }

    return result;
}

From source file:adams.data.conversion.WekaInstancesToSpreadSheet.java

License:Open Source License

/**
 * Performs the actual conversion./* ww  w  . j a  v a  2  s. c  o  m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    SpreadSheet result;
    Instances data;
    Row row;
    int i;
    int n;
    String str;

    data = (Instances) m_Input;

    // special case for InstancesViews
    if (m_SpreadSheetType instanceof InstancesView) {
        result = new InstancesView((Instances) m_Input);
        return result;
    }

    // create header
    result = m_SpreadSheetType.newInstance();
    result.setDataRowClass(m_DataRowType.getClass());
    row = result.getHeaderRow();
    for (n = 0; n < data.numAttributes(); n++)
        row.addCell("" + n).setContent(data.attribute(n).name());
    if (result instanceof Dataset) {
        if (data.classIndex() != -1)
            ((Dataset) result).setClassAttribute(data.classIndex(), true);
    }

    // fill spreadsheet
    for (i = 0; i < data.numInstances(); i++) {
        row = result.addRow("" + i);

        for (n = 0; n < data.numAttributes(); n++) {
            if (data.instance(i).isMissing(n))
                continue;
            if (data.attribute(n).type() == Attribute.DATE) {
                row.addCell("" + n).setContent(new DateTimeMsec(new Date((long) data.instance(i).value(n))));
            } else if (data.attribute(n).type() == Attribute.NUMERIC) {
                row.addCell("" + n).setContent(data.instance(i).value(n));
            } else {
                str = data.instance(i).stringValue(n);
                if (str.equals(SpreadSheet.MISSING_VALUE))
                    row.addCell("" + n).setContentAsString("'" + str + "'");
                else
                    row.addCell("" + n).setContentAsString(str);
            }
        }
    }

    return result;
}

From source file:adams.data.conversion.WekaInstancesToTimeseries.java

License:Open Source License

/**
 * Performs the actual conversion./*from   w  ww  .j  ava2 s .c o  m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    Timeseries result;
    Instances input;
    Instance inst;
    int indexDate;
    int indexValue;
    TimeseriesPoint point;
    int i;
    Date timestamp;
    double value;

    input = (Instances) m_Input;

    // determine attribute indices
    m_DateAttribute.setData(input);
    indexDate = m_DateAttribute.getIntIndex();
    if (indexDate == -1)
        throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex());
    m_ValueAttribute.setData(input);
    indexValue = m_ValueAttribute.getIntIndex();
    if (indexValue == -1)
        throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex());

    result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name());
    for (i = 0; i < input.numInstances(); i++) {
        inst = input.instance(i);
        if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) {
            timestamp = new Date((long) inst.value(indexDate));
            value = inst.value(indexValue);
            point = new TimeseriesPoint(timestamp, value);
            result.add(point);
        }
    }

    return result;
}

From source file:adams.data.instances.InstanceComparator.java

License:Open Source License

/**
 * Compares its two arguments for order.  Returns a negative integer,
 * zero, or a positive integer as the first argument is less than, equal
 * to, or greater than the second.//from w  w  w  . j  av a2  s. c o  m
 *
 * @param o1 the first object to be compared.
 * @param o2 the second object to be compared.
 * @return a negative integer, zero, or a positive integer as the
 *           first argument is less than, equal to, or greater than the
 *          second.
 */
@Override
public int compare(Instance o1, Instance o2) {
    int result;
    Instances header;
    int i;
    int weight;
    double d1;
    double d2;

    result = 0;
    header = o1.dataset();
    i = 0;
    while ((result == 0) && (i < m_Indices.length)) {
        if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i]))
            result = 0;
        else if (o1.isMissing(m_Indices[i]))
            result = -1;
        else if (o2.isMissing(m_Indices[i]))
            result = +1;
        else if (header.attribute(m_Indices[i]).isNumeric()) {
            d1 = o1.value(m_Indices[i]);
            d2 = o2.value(m_Indices[i]);
            if (d1 < d2)
                result = -1;
            else if (d1 == d2)
                result = 0;
            else
                result = +1;
        } else {
            result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i]));
        }

        if (!m_Ascending[i])
            result = -result;

        // add weight to index
        weight = (int) Math.pow(10, (m_Indices.length - i));
        result *= weight;

        i++;
    }

    return result;
}

From source file:adams.data.instancesanalysis.PCA.java

License:Open Source License

/**
 * Create a spreadsheet to output from the coefficients 2D array
 *
 * @param data   the underlying dataset/* w ww.j  ava2 s.  c o m*/
 * @param coeff    The coefficients from the principal components analysis
 * @return      A spreadsheet containing the components
 */
protected SpreadSheet extractLoadings(Instances data, ArrayList<ArrayList<Double>> coeff) {
    SpreadSheet result;
    Row row;
    int i;
    int n;

    result = new DefaultSpreadSheet();
    row = result.getHeaderRow();
    for (i = 0; i < coeff.size(); i++)
        row.addCell("L" + (i + 1)).setContent("Loading-" + (i + 1));
    row.addCell("I").setContent("Index");
    row.addCell("A").setContent("Attribute");

    // add the index/attribute name column
    for (n = 0; n < m_NumAttributes; n++) {
        row = result.addRow();
        row.addCell("I").setContent(n + 1);
        row.addCell("A").setContent(data.attribute(n).name());
    }

    //each arraylist is a single column
    for (i = 0; i < coeff.size(); i++) {
        for (n = 0; n < m_NumAttributes; n++) {
            row = result.getRow(n);

            //attribute was kept earlier
            if (m_Kept.contains(n)) {
                int index = m_Kept.indexOf(n);
                if (index < coeff.get(i).size()) {
                    double value = coeff.get(i).get(index);
                    row.addCell("L" + (i + 1)).setContent(value);
                } else {
                    row.addCell("L" + (i + 1)).setContent(0);
                }
            }
            //attribute wasn't kept, coefficient is 0
            else {
                row.addCell("L" + (i + 1)).setContent(0);
            }
        }
    }

    return result;
}

From source file:adams.data.instancesanalysis.PCA.java

License:Open Source License

/**
 * Performs the actual analysis.//  w  ww. j a  v  a2 s. c  o  m
 *
 * @param data   the data to analyze
 * @return      null if successful, otherwise error message
 * @throws Exception   if analysis fails
 */
@Override
protected String doAnalyze(Instances data) throws Exception {
    String result;
    Remove remove;
    PublicPrincipalComponents pca;
    int i;
    Capabilities caps;
    PartitionedMultiFilter2 part;
    Range rangeUnsupported;
    Range rangeSupported;
    TIntList listNominal;
    Range rangeNominal;
    ArrayList<ArrayList<Double>> coeff;
    Instances filtered;
    SpreadSheet transformed;
    WekaInstancesToSpreadSheet conv;
    String colName;

    result = null;
    m_Loadings = null;
    m_Scores = null;

    if (!m_AttributeRange.isAllRange()) {
        if (isLoggingEnabled())
            getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange());
        remove = new Remove();
        remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices());
        remove.setInvertSelection(true);
        remove.setInputFormat(data);
        data = Filter.useFilter(data, remove);
    }
    if (isLoggingEnabled())
        getLogger().info("Performing PCA...");

    listNominal = new TIntArrayList();
    if (m_SkipNominal) {
        for (i = 0; i < data.numAttributes(); i++) {
            if (i == data.classIndex())
                continue;
            if (data.attribute(i).isNominal())
                listNominal.add(i);
        }
    }

    // check for unsupported attributes
    caps = new PublicPrincipalComponents().getCapabilities();
    m_Supported = new TIntArrayList();
    m_Unsupported = new TIntArrayList();
    for (i = 0; i < data.numAttributes(); i++) {
        if (!caps.test(data.attribute(i)) || (i == data.classIndex()) || (listNominal.contains(i)))
            m_Unsupported.add(i);
        else
            m_Supported.add(i);
    }
    data.setClassIndex(-1);

    m_NumAttributes = m_Supported.size();

    // the principal components will delete the attributes without any distinct values.
    // this checks which instances will be kept.
    m_Kept = new ArrayList<>();
    for (i = 0; i < m_Supported.size(); i++) {
        if (data.numDistinctValues(m_Supported.get(i)) > 1)
            m_Kept.add(m_Supported.get(i));
    }

    // build a model using the PublicPrincipalComponents
    pca = new PublicPrincipalComponents();
    pca.setMaximumAttributes(m_MaxAttributes);
    pca.setVarianceCovered(m_Variance);
    pca.setMaximumAttributeNames(m_MaxAttributeNames);
    part = null;
    if (m_Unsupported.size() > 0) {
        rangeUnsupported = new Range();
        rangeUnsupported.setMax(data.numAttributes());
        rangeUnsupported.setIndices(m_Unsupported.toArray());
        rangeSupported = new Range();
        rangeSupported.setMax(data.numAttributes());
        rangeSupported.setIndices(m_Supported.toArray());
        part = new PartitionedMultiFilter2();
        part.setFilters(new Filter[] { pca, new AllFilter(), });
        part.setRanges(new weka.core.Range[] { new weka.core.Range(rangeSupported.getRange()),
                new weka.core.Range(rangeUnsupported.getRange()), });
    }
    try {
        if (part != null)
            part.setInputFormat(data);
        else
            pca.setInputFormat(data);
    } catch (Exception e) {
        result = Utils.handleException(this, "Failed to set data format", e);
    }

    transformed = null;
    if (result == null) {
        try {
            if (part != null)
                filtered = weka.filters.Filter.useFilter(data, part);
            else
                filtered = weka.filters.Filter.useFilter(data, pca);
        } catch (Exception e) {
            result = Utils.handleException(this, "Failed to apply filter", e);
            filtered = null;
        }
        if (filtered != null) {
            conv = new WekaInstancesToSpreadSheet();
            conv.setInput(filtered);
            result = conv.convert();
            if (result == null) {
                transformed = (SpreadSheet) conv.getOutput();
                // shorten column names again
                if (part != null) {
                    for (i = 0; i < transformed.getColumnCount(); i++) {
                        colName = transformed.getColumnName(i);
                        colName = colName.replaceFirst("filtered-[0-9]*-", "");
                        transformed.getHeaderRow().getCell(i).setContentAsString(colName);
                    }
                }
            }
        }
    }

    if (result == null) {
        // get the coefficients from the filter
        m_Scores = transformed;
        coeff = pca.getCoefficients();
        m_Loadings = extractLoadings(data, coeff);
        m_Loadings.setName("Loadings for " + data.relationName());
    }

    return result;
}

From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java

License:Open Source License

/**
 * Determines the output format based on the input format and returns this.
 *
 * @param input    the input format to base the output format on
 * @return       the output format//from   w  w w . j a v a2  s. c o m
 * @throws Exception    in case the determination goes wrong
 */
@Override
public Instances determineOutputFormat(Instances input) throws Exception {
    ArrayList<Attribute> atts;
    String prefix;
    int i;
    Instances result;
    List<String> classes;

    // collect classes
    m_ClassAttributeIndices = new TIntArrayList();
    classes = new ArrayList<>();
    for (i = 0; i < input.numAttributes(); i++) {
        if (m_ClassAttributes.isMatch(input.attribute(i).name())) {
            classes.add(input.attribute(i).name());
            m_ClassAttributeIndices.add(i);
        }
    }
    if (!classes.contains(input.classAttribute().name())) {
        classes.add(input.classAttribute().name());
        m_ClassAttributeIndices.add(input.classAttribute().index());
    }

    // generate header
    atts = new ArrayList<>();
    prefix = getClass().getSimpleName();
    for (i = 0; i < getNumComponents(); i++)
        atts.add(new Attribute(prefix + "_" + (i + 1)));
    for (String cls : classes)
        atts.add(new Attribute(cls));
    result = new Instances(prefix, atts, 0);
    result.setClassIndex(result.numAttributes() - 1);

    m_OutputFormat = result;

    return result;
}

From source file:adams.flow.sink.WekaAttributeSummary.java

License:Open Source License

/**
 * Displays the token (the panel and dialog have already been created at
 * this stage)./*from  w w  w  .  j av  a 2  s  . c  o  m*/
 *
 * @param token   the token to display
 */
@Override
protected void display(Token token) {
    Instances inst;
    int[] indices;
    AttributeVisualizationPanel panel;

    inst = (Instances) token.getPayload();
    m_Range.setData(inst);
    indices = m_Range.getIntIndices();

    clearPanel();

    if (indices.length == 1) {
        m_PanelAtt = new AttributeVisualizationPanel();
        m_PanelAtt.setInstances(inst);
        m_PanelAtt.setAttribute(indices[0]);
        m_Panel.add(m_PanelAtt, BorderLayout.CENTER);
    } else if (indices.length > 1) {
        m_TabbedPane = new BaseTabbedPane();
        m_Panel.add(m_TabbedPane, BorderLayout.CENTER);
        for (int index : indices) {
            panel = new AttributeVisualizationPanel();
            panel.setInstances(inst);
            panel.setAttribute(index);
            m_TabbedPane.addTab(inst.attribute(index).name(), panel);
        }
    }
}