Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:adams.flow.sink.WekaThresholdCurve.java

License:Open Source License

/**
 * Creates a new panel for the token./*from  ww w . j a  v a  2s .c o m*/
 *
 * @param token   the token to display in a new panel, can be null
 * @return      the generated panel
 */
public AbstractDisplayPanel createDisplayPanel(Token token) {
    AbstractDisplayPanel result;
    String name;

    if (token != null)
        name = "Threshold curve (" + getEvaluation(token).getHeader().relationName() + ")";
    else
        name = "Threshold curve";

    result = new AbstractComponentDisplayPanel(name) {
        private static final long serialVersionUID = -7362768698548152899L;
        protected ThresholdVisualizePanel m_VisualizePanel;

        @Override
        protected void initGUI() {
            super.initGUI();
            setLayout(new BorderLayout());
            m_VisualizePanel = new ThresholdVisualizePanel();
            add(m_VisualizePanel, BorderLayout.CENTER);
        }

        @Override
        public void display(Token token) {
            try {
                Evaluation eval = getEvaluation(token);
                m_ClassLabelRange.setMax(eval.getHeader().classAttribute().numValues());
                int[] indices = m_ClassLabelRange.getIntIndices();
                for (int index : indices) {
                    ThresholdCurve curve = new ThresholdCurve();
                    Instances data = curve.getCurve(eval.predictions(), index);
                    PlotData2D plot = new PlotData2D(data);
                    plot.setPlotName(eval.getHeader().classAttribute().value(index));
                    plot.m_displayAllPoints = true;
                    boolean[] connectPoints = new boolean[data.numInstances()];
                    for (int cp = 1; cp < connectPoints.length; cp++)
                        connectPoints[cp] = true;
                    plot.setConnectPoints(connectPoints);
                    m_VisualizePanel.addPlot(plot);
                    if (data.attribute(m_AttributeX.toDisplay()) != null)
                        m_VisualizePanel.setXIndex(data.attribute(m_AttributeX.toDisplay()).index());
                    if (data.attribute(m_AttributeY.toDisplay()) != null)
                        m_VisualizePanel.setYIndex(data.attribute(m_AttributeY.toDisplay()).index());
                }
            } catch (Exception e) {
                getLogger().log(Level.SEVERE, "Failed to display token: " + token, e);
            }
        }

        @Override
        public JComponent supplyComponent() {
            return m_VisualizePanel;
        }

        @Override
        public void clearPanel() {
            m_VisualizePanel.removeAllPlots();
        }

        public void cleanUp() {
            m_VisualizePanel.removeAllPlots();
        }
    };

    if (token != null)
        result.display(token);

    return result;
}

From source file:adams.flow.transformer.WekaCrossValidationClustererEvaluator.java

License:Open Source License

/**
 * Executes the flow item.//w  ww  . ja  v a2 s .c  o  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    weka.clusterers.Clusterer cls;
    int folds;
    MakeDensityBasedClusterer make;
    double log;

    result = null;

    try {
        // evaluate classifier
        cls = getClustererInstance();
        if (cls == null)
            throw new IllegalStateException("Clusterer '" + getClusterer() + "' not found!");

        data = (Instances) m_InputToken.getPayload();
        folds = m_Folds;
        if (folds == -1)
            folds = data.numInstances();
        if (!(cls instanceof DensityBasedClusterer)) {
            make = new MakeDensityBasedClusterer();
            make.setClusterer(cls);
            cls = make;
        }
        log = ClusterEvaluation.crossValidateModel((DensityBasedClusterer) cls, data, folds,
                new Random(m_Seed));
        m_OutputToken = new Token(new WekaClusterEvaluationContainer(log));
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to cross-validate clusterer: ", e);
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}

From source file:adams.flow.transformer.WekaExtractArray.java

License:Open Source License

/**
 * Executes the flow item./*from w w w  .  j  av  a  2s.  co  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Double[] array;
    Instances inst;
    SpreadSheet sheet;
    int i;
    int index;
    Cell cell;

    result = null;

    array = null;
    if (m_InputToken.getPayload() instanceof Instances) {
        inst = (Instances) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(inst.numAttributes());
        else
            m_Index.setMax(inst.numInstances());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric())
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[inst.numInstances()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(i).value(index);
            } else {
                array = new Double[inst.numAttributes()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(index).value(i);
            }
        }
    } else {
        sheet = (SpreadSheet) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(sheet.getColumnCount());
        else
            m_Index.setMax(sheet.getRowCount());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true))
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[sheet.getRowCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(i, index);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            } else {
                array = new Double[sheet.getColumnCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(index, i);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            }
        }
    }

    if (array != null)
        m_OutputToken = new Token(array);

    return result;
}

From source file:adams.flow.transformer.WekaFilter.java

License:Open Source License

/**
 * Executes the flow item.//from  w  w  w.j  a  v  a  2  s  .com
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    weka.core.Instances data;
    weka.core.Instances filteredData;
    weka.core.Instance inst;
    adams.data.instance.Instance instA;
    weka.core.Instance filteredInst;
    String relation;

    result = null;

    data = null;
    inst = null;
    if (m_InputToken.hasPayload(weka.core.Instance.class))
        inst = m_InputToken.getPayload(weka.core.Instance.class);
    else if (m_InputToken.hasPayload(adams.data.instance.Instance.class))
        inst = m_InputToken.getPayload(adams.data.instance.Instance.class).toInstance();
    else if (m_InputToken.hasPayload(weka.core.Instances.class))
        data = m_InputToken.getPayload(weka.core.Instances.class);
    else
        result = m_InputToken.unhandledData();

    if (result == null) {
        try {
            // initialize filter?
            if (!m_Initialized || !m_InitializeOnce) {
                if (data == null) {
                    data = new weka.core.Instances(inst.dataset(), 0);
                    data.add(inst);
                }
                initActualFilter(data);
            }

            synchronized (m_ActualFilter) {
                if (!m_FlowContextUpdated) {
                    m_FlowContextUpdated = true;
                    if (m_ActualFilter instanceof FlowContextHandler)
                        ((FlowContextHandler) m_ActualFilter).setFlowContext(this);
                }

                // filter data
                filteredData = null;
                filteredInst = null;
                if (data != null) {
                    relation = data.relationName();
                    filteredData = weka.filters.Filter.useFilter(data, m_ActualFilter);
                    if (m_KeepRelationName) {
                        filteredData.setRelationName(relation);
                        if (isLoggingEnabled())
                            getLogger().info("Setting relation name: " + relation);
                    }
                    m_Initialized = true;
                } else {
                    relation = inst.dataset().relationName();
                    m_ActualFilter.input(inst);
                    m_ActualFilter.batchFinished();
                    filteredInst = m_ActualFilter.output();
                    if (m_KeepRelationName) {
                        filteredInst.dataset().setRelationName(relation);
                        if (isLoggingEnabled())
                            getLogger().info("Setting relation name: " + relation);
                    }
                }
            }

            // build output token
            if (inst != null) {
                if (filteredInst != null) {
                    if (m_InputToken.getPayload() instanceof weka.core.Instance) {
                        m_OutputToken = new Token(filteredInst);
                    } else {
                        instA = new adams.data.instance.Instance();
                        instA.set(filteredInst);
                        m_OutputToken = createToken(m_InputToken.getPayload(), instA);
                    }
                } else if ((filteredData != null) && (filteredData.numInstances() > 0)) {
                    m_OutputToken = createToken(m_InputToken.getPayload(), filteredData.instance(0));
                }
            } else {
                m_OutputToken = createToken(m_InputToken.getPayload(), filteredData);
            }
        } catch (Exception e) {
            result = handleException("Failed to filter data: ", e);
        }
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}

From source file:adams.flow.transformer.WekaGetInstancesValue.java

License:Open Source License

/**
 * Executes the flow item.// ww  w  .j a  v  a  2  s  .c om
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int row;

    result = null;

    inst = (Instances) m_InputToken.getPayload();
    m_Column.setData(inst);
    m_Row.setMax(inst.numInstances());
    index = m_Column.getIntIndex();
    row = m_Row.getIntIndex();

    if (row == -1)
        result = "Failed to retrieve row: " + m_Row.getIndex();
    else if (index == -1)
        result = "Failed to retrieve column: " + m_Column.getIndex();

    if (result == null) {
        try {
            if (inst.instance(row).isMissing(index)) {
                m_OutputToken = new Token("?");
            } else {
                switch (inst.attribute(index).type()) {
                case Attribute.NUMERIC:
                    m_OutputToken = new Token(inst.instance(row).value(index));
                    break;

                case Attribute.DATE:
                case Attribute.NOMINAL:
                case Attribute.STRING:
                case Attribute.RELATIONAL:
                    m_OutputToken = new Token(inst.instance(row).stringValue(index));
                    break;

                default:
                    result = "Unhandled attribute type: " + inst.attribute(index).type();
                }
            }
        } catch (Exception e) {
            result = handleException("Failed to obtain value from dataset:", e);
        }
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java

License:Open Source License

/**
 * Executes the flow item.//from  w  w w .j av  a  2 s .c om
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Instances data;
    int i;
    int n;
    Index index;
    ArrayHistogram stat;

    result = null;
    m_Queue.clear();

    try {
        sheet = null;
        data = (Instances) m_InputToken.getPayload();
        stat = new ArrayHistogram();
        stat.setBinCalculation(m_BinCalculation);
        stat.setNumBins(m_NumBins);
        stat.setBinWidth(m_BinWidth);
        stat.setNormalize(m_Normalize);
        stat.setUseFixedMinMax(m_UseFixedMinMax);
        stat.setManualMin(m_ManualMin);
        stat.setManualMax(m_ManualMax);
        stat.setDisplayRanges(true);
        stat.setNumDecimals(m_NumDecimals);

        for (i = 0; i < m_Locations.length; i++) {
            switch (m_DataType) {
            case ROW_BY_INDEX:
                index = new Index(m_Locations[i].stringValue());
                index.setMax(data.numInstances());
                stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray()));
                break;

            case COLUMN_BY_INDEX:
                index = new WekaAttributeIndex(m_Locations[i].stringValue());
                ((WekaAttributeIndex) index).setData(data);
                stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex())));
                break;

            case COLUMN_BY_REGEXP:
                for (n = 0; n < data.numAttributes(); n++) {
                    if (data.attribute(n).name().matches(m_Locations[i].stringValue())) {
                        stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n)));
                        break;
                    }
                }
                break;

            default:
                throw new IllegalStateException("Unhandled data type: " + m_DataType);
            }
        }

        sheet = stat.calculate().toSpreadSheet();
    } catch (Exception e) {
        result = handleException("Error generating the ranges: ", e);
        sheet = null;
    }

    if (sheet != null) {
        for (i = 0; i < sheet.getColumnCount(); i++)
            m_Queue.add(sheet.getColumnName(i));
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Executes the flow item./* w w w . ja v a 2  s. c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int labelIndex;
    double[] dist;
    Enumeration enm;
    int i;

    result = null;

    if (m_InputToken.getPayload() instanceof Instance)
        inst = ((Instance) m_InputToken.getPayload()).dataset();
    else
        inst = (Instances) m_InputToken.getPayload();
    m_AttributeIndex.setData(inst);
    index = m_AttributeIndex.getIntIndex();

    m_Queue.clear();

    switch (m_Type) {
    case FULL:
        m_Queue.add(inst.toSummaryString());
        break;

    case FULL_ATTRIBUTE:
        m_Queue.add(getAttributeStats(inst, index));
        break;

    case FULL_CLASS:
        if (inst.classIndex() > -1)
            m_Queue.add(getAttributeStats(inst, inst.classIndex()));
        break;

    case HEADER:
        m_Queue.add(new Instances(inst, 0).toString());
        break;

    case RELATION_NAME:
        m_Queue.add(inst.relationName());
        break;

    case ATTRIBUTE_NAME:
        if (index != -1)
            m_Queue.add(inst.attribute(index).name());
        break;

    case ATTRIBUTE_NAMES:
        for (i = 0; i < inst.numAttributes(); i++)
            m_Queue.add(inst.attribute(i).name());
        break;

    case LABELS:
        if (index != -1) {
            enm = inst.attribute(index).enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case CLASS_LABELS:
        if (inst.classIndex() > -1) {
            enm = inst.classAttribute().enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case LABEL_COUNT:
        if (index > -1) {
            m_LabelIndex.setData(inst.attribute(index));
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]);
        }
        break;

    case LABEL_COUNTS:
        if (index > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts));
        break;

    case LABEL_DISTRIBUTION:
        if (index > -1) {
            dist = new double[inst.attributeStats(index).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(index).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case CLASS_LABEL_COUNT:
        if (inst.classIndex() > -1) {
            m_LabelIndex.setData(inst.classAttribute());
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]);
        }
        break;

    case CLASS_LABEL_COUNTS:
        if (inst.classIndex() > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts));
        break;

    case CLASS_LABEL_DISTRIBUTION:
        if (inst.classIndex() > -1) {
            dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case NUM_ATTRIBUTES:
        m_Queue.add(inst.numAttributes());
        break;

    case NUM_INSTANCES:
        m_Queue.add(inst.numInstances());
        break;

    case NUM_CLASS_LABELS:
        if ((inst.classIndex() != -1) && inst.classAttribute().isNominal())
            m_Queue.add(inst.classAttribute().numValues());
        break;

    case NUM_LABELS:
        if ((index != -1) && inst.attribute(index).isNominal())
            m_Queue.add(inst.attribute(index).numValues());
        break;

    case NUM_DISTINCT_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).distinctCount);
        break;

    case NUM_UNIQUE_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).uniqueCount);
        break;

    case NUM_MISSING_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).missingCount);
        break;

    case MIN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.min);
        break;

    case MAX:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.max);
        break;

    case MEAN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.mean);
        break;

    case STDEV:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.stdDev);
        break;

    case ATTRIBUTE_TYPE:
        if (index != -1)
            m_Queue.add(Attribute.typeToString(inst.attribute(index)));
        break;

    case CLASS_TYPE:
        if (inst.classIndex() != -1)
            m_Queue.add(Attribute.typeToString(inst.classAttribute()));
        break;

    default:
        result = "Unhandled info type: " + m_Type;
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Prefixes the attributes./*from   ww w  . j a  v  a 2 s .  c om*/
 *
 * @param index   the index of the dataset
 * @param inst   the data to process
 * @return      the processed data
 */
protected Instances prefixAttributes(Instances inst, int index) {
    Instances result;
    String prefix;
    ArrayList<Attribute> atts;
    int i;

    prefix = createPrefix(inst, index);

    // header
    atts = new ArrayList<>();
    for (i = 0; i < inst.numAttributes(); i++)
        atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name()));

    // data
    result = new Instances(inst.relationName(), atts, inst.numInstances());
    result.setClassIndex(inst.classIndex());
    for (i = 0; i < inst.numInstances(); i++)
        result.add((Instance) inst.instance(i).copy());

    return result;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Updates the IDs in the hashset with the ones stored in the ID attribute
 * of the provided dataset./*from  www .j  a  v  a2  s  .  c  o  m*/
 *
 * @param instIndex    the dataset index
 * @param inst   the dataset to obtain the IDs from
 * @param ids      the hashset to store the IDs in
 */
protected void updateIDs(int instIndex, Instances inst, HashSet ids) {
    Attribute att;
    int i;
    boolean numeric;
    HashSet current;
    Object id;

    att = inst.attribute(m_UniqueID);
    if (att == null)
        throw new IllegalStateException("Attribute '" + m_UniqueID + "' not found in relation '"
                + inst.relationName() + "' (#" + (instIndex + 1) + ")!");

    // determine/check type
    if (m_AttType == -1) {
        if ((att.type() == Attribute.NUMERIC) || (att.type() == Attribute.STRING))
            m_AttType = att.type();
        else
            throw new IllegalStateException("Attribute '" + m_UniqueID + "' must be either NUMERIC or STRING (#"
                    + (instIndex + 1) + ")!");
    } else {
        if (m_AttType != att.type())
            throw new IllegalStateException("Attribute '" + m_UniqueID
                    + "' must have same attribute type in all the datasets (#" + (instIndex + 1) + ")!");
    }

    // get IDs
    numeric = m_AttType == Attribute.NUMERIC;
    current = new HashSet();
    for (i = 0; i < inst.numInstances(); i++) {
        if (numeric)
            id = inst.instance(i).value(att);
        else
            id = inst.instance(i).stringValue(att);
        if (m_Strict && current.contains(id))
            throw new IllegalStateException(
                    "ID '" + id + "' is not unique in dataset #" + (instIndex + 1) + "!");
        current.add(id);
    }
    ids.addAll(current);
}

From source file:adams.flow.transformer.WekaInstancesStatistic.java

License:Open Source License

/**
 * Executes the flow item./*  w  w  w  .  ja  v a 2s  . com*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Instances data;
    int i;
    int n;
    Index index;
    AbstractArrayStatistic stat;

    result = null;

    try {
        sheet = null;
        data = (Instances) m_InputToken.getPayload();
        stat = m_Statistic.shallowCopy(true);

        for (i = 0; i < m_Locations.length; i++) {
            switch (m_DataType) {
            case ROW_BY_INDEX:
                index = new Index(m_Locations[i].stringValue());
                index.setMax(data.numInstances());
                stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray()));
                break;

            case COLUMN_BY_INDEX:
                index = new WekaAttributeIndex(m_Locations[i].stringValue());
                ((WekaAttributeIndex) index).setData(data);
                stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex())));
                break;

            case COLUMN_BY_REGEXP:
                for (n = 0; n < data.numAttributes(); n++) {
                    if (data.attribute(n).name().matches(m_Locations[i].stringValue())) {
                        stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n)));
                        break;
                    }
                }
                break;

            default:
                throw new IllegalStateException("Unhandled data type: " + m_DataType);
            }
        }

        sheet = stat.calculate().toSpreadSheet();
    } catch (Exception e) {
        result = handleException("Error generating the statistic: ", e);
        sheet = null;
    }

    if (sheet != null)
        m_OutputToken = new Token(sheet);

    return result;
}