Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:adams.flow.transformer.WekaExperimentEvaluation.java

License:Open Source License

/**
 * Sets up the testing algorithm and returns it.
 *
 * @param data   the experimental data//from  www . ja v  a  2 s.c  o m
 * @return      the configured testing algorithm
 * @throws Exception    If something goes wrong, like testing algorithm of
 *          result matrix cannot be instantiated
 */
protected Tester getTester(Instances data) throws Exception {
    Tester ttester;
    ResultMatrix matrix;
    String tmpStr;
    weka.core.Attribute att;
    List<String> rows;
    List<String> cols;
    String selectedList;
    String selectedListDataset;
    boolean comparisonFieldSet;
    int i;
    String name;
    Range generatorRange;

    ttester = (Tester) Utils.deepCopy(m_Tester);
    matrix = (ResultMatrix) Utils.deepCopy(m_OutputFormat);
    ttester.setInstances(data);
    ttester.setSignificanceLevel(m_Significance);
    ttester.setShowStdDevs(matrix.getShowStdDev());
    ttester.setSortColumn(-1);

    if (!m_SwapRowsAndColumns) {
        rows = determineColumnNames(m_Row, ExperimenterDefaults.getRow());
        cols = determineColumnNames(m_Column, ExperimenterDefaults.getColumn());
    } else {
        cols = determineColumnNames(m_Row, ExperimenterDefaults.getRow());
        rows = determineColumnNames(m_Column, ExperimenterDefaults.getColumn());
    }
    selectedList = "";
    selectedListDataset = "";
    comparisonFieldSet = false;
    for (i = 0; i < data.numAttributes(); i++) {
        name = data.attribute(i).name();

        if (rows.contains(name.toLowerCase())) {
            selectedListDataset += "," + (i + 1);
        } else if (name.toLowerCase().equals("key_run")) {
            ttester.setRunColumn(i);
        } else if (name.toLowerCase().equals("key_fold")) {
            ttester.setFoldColumn(i);
        } else if (cols.contains(name.toLowerCase())) {
            selectedList += "," + (i + 1);
        } else if (name.toLowerCase().contains(ExperimenterDefaults.getComparisonField())) {
            comparisonFieldSet = true;
        } else if ((name.toLowerCase().contains("root_relative_squared_error")) && (!comparisonFieldSet)) {
            comparisonFieldSet = true;
        }
    }
    generatorRange = new Range();
    if (selectedList.length() != 0) {
        try {
            generatorRange.setRanges(selectedList);
        } catch (Exception ex) {
            handleException("Failed to set ranges: " + selectedList, ex);
        }
    }
    ttester.setResultsetKeyColumns(generatorRange);

    generatorRange = new Range();
    if (selectedListDataset.length() != 0) {
        try {
            generatorRange.setRanges(selectedListDataset);
        } catch (Exception ex) {
            handleException("Failed to set dataset ranges: " + selectedListDataset, ex);
        }
    }
    ttester.setDatasetKeyColumns(generatorRange);

    tmpStr = m_ComparisonField.getField();
    att = data.attribute(tmpStr);
    if (att == null)
        throw new Exception("Cannot find comparison field '" + tmpStr + "' in data!");
    ttester.setDisplayedResultsets(null); // all
    ttester.setResultMatrix(matrix);

    return ttester;
}

From source file:adams.flow.transformer.WekaExperimentEvaluation.java

License:Open Source License

/**
 * Evaluates the experiment data.//from ww w.j av  a 2 s .c  o m
 *
 * @param data   the data to evaluate
 * @throws Exception    If something goes wrong, like loading
 *          data fails or comparison field invalid
 */
protected void evaluateExperiment(Instances data) throws Exception {
    Tester ttester;
    StringBuilder outBuff;
    int compareCol;
    int tType;
    String tmpStr;
    weka.core.Attribute att;

    // setup testing algorithm
    ttester = getTester(data);

    // evaluate experiment
    tmpStr = m_ComparisonField.getField();
    att = data.attribute(tmpStr);
    if (att == null)
        throw new Exception("Cannot find comparison field '" + tmpStr + "' in data!");
    compareCol = att.index();
    tType = m_TestBase;
    outBuff = new StringBuilder();
    if (m_OutputHeader) {
        outBuff.append(ttester.header(compareCol));
        outBuff.append("\n");
    }
    try {
        if (tType < ttester.getNumResultsets())
            outBuff.append(ttester.multiResultsetFull(tType, compareCol));
        else if (tType == ttester.getNumResultsets())
            outBuff.append(ttester.multiResultsetSummary(compareCol));
        else
            outBuff.append(ttester.multiResultsetRanking(compareCol));
        outBuff.append("\n");
    } catch (Exception ex) {
        outBuff.append(ex.getMessage() + "\n");
    }

    // broadcast evaluation
    m_OutputToken = new Token(outBuff.toString());
}

From source file:adams.flow.transformer.WekaExtractArray.java

License:Open Source License

/**
 * Executes the flow item.// ww  w .j  ava  2 s .co  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Double[] array;
    Instances inst;
    SpreadSheet sheet;
    int i;
    int index;
    Cell cell;

    result = null;

    array = null;
    if (m_InputToken.getPayload() instanceof Instances) {
        inst = (Instances) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(inst.numAttributes());
        else
            m_Index.setMax(inst.numInstances());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric())
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[inst.numInstances()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(i).value(index);
            } else {
                array = new Double[inst.numAttributes()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(index).value(i);
            }
        }
    } else {
        sheet = (SpreadSheet) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(sheet.getColumnCount());
        else
            m_Index.setMax(sheet.getRowCount());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true))
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[sheet.getRowCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(i, index);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            } else {
                array = new Double[sheet.getColumnCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(index, i);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            }
        }
    }

    if (array != null)
        m_OutputToken = new Token(array);

    return result;
}

From source file:adams.flow.transformer.WekaGetInstancesValue.java

License:Open Source License

/**
 * Executes the flow item./*from w w  w.  ja v a 2s.  c  om*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int row;

    result = null;

    inst = (Instances) m_InputToken.getPayload();
    m_Column.setData(inst);
    m_Row.setMax(inst.numInstances());
    index = m_Column.getIntIndex();
    row = m_Row.getIntIndex();

    if (row == -1)
        result = "Failed to retrieve row: " + m_Row.getIndex();
    else if (index == -1)
        result = "Failed to retrieve column: " + m_Column.getIndex();

    if (result == null) {
        try {
            if (inst.instance(row).isMissing(index)) {
                m_OutputToken = new Token("?");
            } else {
                switch (inst.attribute(index).type()) {
                case Attribute.NUMERIC:
                    m_OutputToken = new Token(inst.instance(row).value(index));
                    break;

                case Attribute.DATE:
                case Attribute.NOMINAL:
                case Attribute.STRING:
                case Attribute.RELATIONAL:
                    m_OutputToken = new Token(inst.instance(row).stringValue(index));
                    break;

                default:
                    result = "Unhandled attribute type: " + inst.attribute(index).type();
                }
            }
        } catch (Exception e) {
            result = handleException("Failed to obtain value from dataset:", e);
        }
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstanceDumper.java

License:Open Source License

/**
 * Turns the dataset header into the appropriate format.
 *
 * @param header   the header to convert
 * @return      the generated output/*from   ww  w  .j  av  a  2s . c om*/
 */
protected String createHeader(Instances header) {
    StringBuilder result;
    int i;

    result = new StringBuilder();

    switch (m_OutputFormat) {
    case ARFF:
        result.append(new Instances(header, 0).toString());
        break;

    case CSV:
        for (i = 0; i < header.numAttributes(); i++) {
            if (i > 0)
                result.append(",");
            result.append(Utils.quote(header.attribute(i).name()));
        }
        break;

    case TAB:
        for (i = 0; i < header.numAttributes(); i++) {
            if (i > 0)
                result.append("\t");
            result.append(Utils.quote(header.attribute(i).name()));
        }
        break;

    default:
        throw new IllegalStateException("Unhandled output format: " + m_OutputFormat);
    }

    return result.toString();
}

From source file:adams.flow.transformer.WekaInstanceEvaluator.java

License:Open Source License

/**
 * Determines the name of the evaluation attribute.
 *
 * @param data   the original input data
 * @return      the generated name/*from  www . j av  a 2 s  .  co  m*/
 * @see      #m_AttributeName
 */
protected String determineAttributeName(Instances data) {
    String result;
    int i;

    result = ATTRIBUTE_NAME;

    i = 0;
    while (data.attribute(result) != null) {
        i++;
        result = ATTRIBUTE_NAME + i;
    }

    m_AttributeName = result;
    if (isLoggingEnabled())
        getLogger().info("Chosen attribute name: " + m_AttributeName);

    return result;
}

From source file:adams.flow.transformer.WekaInstancesAppend.java

License:Open Source License

/**
 * Executes the flow item./*from w  w  w  . ja v a  2 s . c  o m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    String[] filesStr;
    File[] files;
    int i;
    int n;
    Instances[] inst;
    Instances full;
    String msg;
    StringBuilder relation;
    double[] values;

    result = null;

    // get filenames
    files = null;
    inst = null;
    if (m_InputToken.getPayload() instanceof String[]) {
        filesStr = (String[]) m_InputToken.getPayload();
        files = new File[filesStr.length];
        for (i = 0; i < filesStr.length; i++)
            files[i] = new PlaceholderFile(filesStr[i]);
    } else if (m_InputToken.getPayload() instanceof File[]) {
        files = (File[]) m_InputToken.getPayload();
    } else if (m_InputToken.getPayload() instanceof Instances[]) {
        inst = (Instances[]) m_InputToken.getPayload();
    } else {
        throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass());
    }

    // load data?
    if (files != null) {
        inst = new Instances[files.length];
        for (i = 0; i < files.length; i++) {
            try {
                inst[i] = DataSource.read(files[i].getAbsolutePath());
            } catch (Exception e) {
                result = handleException("Failed to load dataset: " + files[i], e);
                break;
            }
        }
    }

    // test compatibility
    if (result == null) {
        for (i = 0; i < inst.length - 1; i++) {
            for (n = i + 1; n < inst.length; n++) {
                if ((msg = inst[i].equalHeadersMsg(inst[n])) != null) {
                    result = "Dataset #" + (i + 1) + " and #" + (n + 1) + " are not compatible:\n" + msg;
                    break;
                }
            }
            if (result != null)
                break;
        }
    }

    // append
    if (result == null) {
        full = new Instances(inst[0]);
        relation = new StringBuilder(inst[0].relationName());
        for (i = 1; i < inst.length; i++) {
            relation.append("+" + inst[i].relationName());
            for (Instance row : inst[i]) {
                values = row.toDoubleArray();
                for (n = 0; n < values.length; n++) {
                    if (row.attribute(n).isString())
                        values[n] = full.attribute(n).addStringValue(row.stringValue(n));
                    else if (row.attribute(n).isRelationValued())
                        values[n] = full.attribute(n).addRelation(row.relationalValue(n));
                }
                if (row instanceof SparseInstance)
                    row = new SparseInstance(row.weight(), values);
                else
                    row = new DenseInstance(row.weight(), values);
                full.add(row);
            }
        }
        full.setRelationName(relation.toString());
        m_OutputToken = new Token(full);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java

License:Open Source License

/**
 * Executes the flow item.//  w w  w  .  j  a  v  a 2s.  c  o m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Instances data;
    int i;
    int n;
    Index index;
    ArrayHistogram stat;

    result = null;
    m_Queue.clear();

    try {
        sheet = null;
        data = (Instances) m_InputToken.getPayload();
        stat = new ArrayHistogram();
        stat.setBinCalculation(m_BinCalculation);
        stat.setNumBins(m_NumBins);
        stat.setBinWidth(m_BinWidth);
        stat.setNormalize(m_Normalize);
        stat.setUseFixedMinMax(m_UseFixedMinMax);
        stat.setManualMin(m_ManualMin);
        stat.setManualMax(m_ManualMax);
        stat.setDisplayRanges(true);
        stat.setNumDecimals(m_NumDecimals);

        for (i = 0; i < m_Locations.length; i++) {
            switch (m_DataType) {
            case ROW_BY_INDEX:
                index = new Index(m_Locations[i].stringValue());
                index.setMax(data.numInstances());
                stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray()));
                break;

            case COLUMN_BY_INDEX:
                index = new WekaAttributeIndex(m_Locations[i].stringValue());
                ((WekaAttributeIndex) index).setData(data);
                stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex())));
                break;

            case COLUMN_BY_REGEXP:
                for (n = 0; n < data.numAttributes(); n++) {
                    if (data.attribute(n).name().matches(m_Locations[i].stringValue())) {
                        stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n)));
                        break;
                    }
                }
                break;

            default:
                throw new IllegalStateException("Unhandled data type: " + m_DataType);
            }
        }

        sheet = stat.calculate().toSpreadSheet();
    } catch (Exception e) {
        result = handleException("Error generating the ranges: ", e);
        sheet = null;
    }

    if (sheet != null) {
        for (i = 0; i < sheet.getColumnCount(); i++)
            m_Queue.add(sheet.getColumnName(i));
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Generates attributes statistics.//from   www.j  a  va2s.c  om
 * 
 * @param data   the dataset to use
 * @param index   the 0-based index of the attribute
 */
protected SpreadSheet getAttributeStats(Instances data, int index) {
    SpreadSheet result;
    Attribute att;
    AttributeStats stats;
    Row row;
    int i;

    result = new DefaultSpreadSheet();
    result.setName("Attribute statistics - #" + (index + 1) + " " + data.attribute(index).name());

    // header
    row = result.getHeaderRow();
    row.addCell("S").setContent("Statistic");
    row.addCell("V").setContent("Value");

    // data
    att = data.attribute(index);
    if (att.isNominal()) {
        stats = data.attributeStats(index);
        addStatistic(result, "Total", stats.totalCount);
        addStatistic(result, "Missing", stats.missingCount);
        addStatistic(result, "Unique", stats.uniqueCount);
        addStatistic(result, "Distinct", stats.distinctCount);
        addStatistic(result, "Integer-like", stats.intCount);
        addStatistic(result, "Float-like", stats.realCount);
        for (i = 0; i < stats.nominalCounts.length; i++)
            addStatistic(result, "Label-" + (i + 1) + "-" + att.value(i), stats.nominalCounts[i]);
        for (i = 0; i < stats.nominalWeights.length; i++)
            addStatistic(result, "Weight-" + (i + 1) + "-" + att.value(i), stats.nominalWeights[i]);
    } else if (att.isDate()) {
        if (m_DateFormat == null)
            m_DateFormat = DateUtils.getTimestampFormatter();
        stats = data.attributeStats(index);
        addStatistic(result, "Count", stats.numericStats.count);
        addStatistic(result, "Min", formatDate(stats.numericStats.min));
        addStatistic(result, "Max", formatDate(stats.numericStats.max));
        addStatistic(result, "Mean", formatDate(stats.numericStats.mean));
        addStatistic(result, "StdDev (in days)", stats.numericStats.stdDev / 1000 / 60 / 60 / 24);
    } else if (att.isNumeric()) {
        stats = data.attributeStats(index);
        addStatistic(result, "Count", stats.numericStats.count);
        addStatistic(result, "Min", stats.numericStats.min);
        addStatistic(result, "Max", stats.numericStats.max);
        addStatistic(result, "Mean", stats.numericStats.mean);
        addStatistic(result, "StdDev", stats.numericStats.stdDev);
        addStatistic(result, "Sum", stats.numericStats.sum);
        addStatistic(result, "Sum^2", stats.numericStats.sumSq);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Executes the flow item.//  w w w.  ja  va  2s .c  o m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int labelIndex;
    double[] dist;
    Enumeration enm;
    int i;

    result = null;

    if (m_InputToken.getPayload() instanceof Instance)
        inst = ((Instance) m_InputToken.getPayload()).dataset();
    else
        inst = (Instances) m_InputToken.getPayload();
    m_AttributeIndex.setData(inst);
    index = m_AttributeIndex.getIntIndex();

    m_Queue.clear();

    switch (m_Type) {
    case FULL:
        m_Queue.add(inst.toSummaryString());
        break;

    case FULL_ATTRIBUTE:
        m_Queue.add(getAttributeStats(inst, index));
        break;

    case FULL_CLASS:
        if (inst.classIndex() > -1)
            m_Queue.add(getAttributeStats(inst, inst.classIndex()));
        break;

    case HEADER:
        m_Queue.add(new Instances(inst, 0).toString());
        break;

    case RELATION_NAME:
        m_Queue.add(inst.relationName());
        break;

    case ATTRIBUTE_NAME:
        if (index != -1)
            m_Queue.add(inst.attribute(index).name());
        break;

    case ATTRIBUTE_NAMES:
        for (i = 0; i < inst.numAttributes(); i++)
            m_Queue.add(inst.attribute(i).name());
        break;

    case LABELS:
        if (index != -1) {
            enm = inst.attribute(index).enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case CLASS_LABELS:
        if (inst.classIndex() > -1) {
            enm = inst.classAttribute().enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case LABEL_COUNT:
        if (index > -1) {
            m_LabelIndex.setData(inst.attribute(index));
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]);
        }
        break;

    case LABEL_COUNTS:
        if (index > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts));
        break;

    case LABEL_DISTRIBUTION:
        if (index > -1) {
            dist = new double[inst.attributeStats(index).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(index).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case CLASS_LABEL_COUNT:
        if (inst.classIndex() > -1) {
            m_LabelIndex.setData(inst.classAttribute());
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]);
        }
        break;

    case CLASS_LABEL_COUNTS:
        if (inst.classIndex() > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts));
        break;

    case CLASS_LABEL_DISTRIBUTION:
        if (inst.classIndex() > -1) {
            dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case NUM_ATTRIBUTES:
        m_Queue.add(inst.numAttributes());
        break;

    case NUM_INSTANCES:
        m_Queue.add(inst.numInstances());
        break;

    case NUM_CLASS_LABELS:
        if ((inst.classIndex() != -1) && inst.classAttribute().isNominal())
            m_Queue.add(inst.classAttribute().numValues());
        break;

    case NUM_LABELS:
        if ((index != -1) && inst.attribute(index).isNominal())
            m_Queue.add(inst.attribute(index).numValues());
        break;

    case NUM_DISTINCT_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).distinctCount);
        break;

    case NUM_UNIQUE_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).uniqueCount);
        break;

    case NUM_MISSING_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).missingCount);
        break;

    case MIN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.min);
        break;

    case MAX:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.max);
        break;

    case MEAN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.mean);
        break;

    case STDEV:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.stdDev);
        break;

    case ATTRIBUTE_TYPE:
        if (index != -1)
            m_Queue.add(Attribute.typeToString(inst.attribute(index)));
        break;

    case CLASS_TYPE:
        if (inst.classIndex() != -1)
            m_Queue.add(Attribute.typeToString(inst.classAttribute()));
        break;

    default:
        result = "Unhandled info type: " + m_Type;
    }

    return result;
}