Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:adams.data.outlier.AbstractInstanceOutlierDetectorTestCase.java

License:Open Source License

/**
 * Loads the data to process.//from   w  w  w.  j  av a2s .c o m
 *
 * @param filename   the filename to load (without path)
 * @return      the first instance in the dataset, or null if it failed
 *          to load; class attribute is always the last
 */
protected Instance load(String filename) {
    Instance result;
    Instances data;

    result = new Instance();
    try {
        m_TestHelper.copyResourceToTmp(filename);
        data = DataSource.read(new TmpFile(filename).getAbsolutePath());
        data.setClassIndex(data.numAttributes() - 1);
        result.set(data.instance(0));
    } catch (Exception e) {
        e.printStackTrace();
        result = null;
    } finally {
        m_TestHelper.deleteFileFromTmp(filename);
    }

    return result;
}

From source file:adams.flow.transformer.WekaBootstrapping.java

License:Open Source License

/**
 * Executes the flow item./*from   w w w.j  a  v  a 2s .c  o m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Row row;
    Evaluation evalAll;
    Evaluation eval;
    WekaEvaluationContainer cont;
    TIntList indices;
    Random random;
    int i;
    int iteration;
    int size;
    List<Prediction> preds;
    Instances header;
    Instances data;
    ArrayList<Attribute> atts;
    Instance inst;
    boolean numeric;
    int classIndex;
    Double[] errors;
    Double[] errorsRev;
    Percentile<Double> perc;
    Percentile<Double> percRev;
    TIntList subset;

    result = null;

    if (m_InputToken.getPayload() instanceof Evaluation) {
        evalAll = (Evaluation) m_InputToken.getPayload();
    } else {
        cont = (WekaEvaluationContainer) m_InputToken.getPayload();
        evalAll = (Evaluation) cont.getValue(WekaEvaluationContainer.VALUE_EVALUATION);
    }

    if ((evalAll.predictions() == null) || (evalAll.predictions().size() == 0))
        result = "No predictions available!";

    if (result == null) {
        // init spreadsheet
        sheet = new DefaultSpreadSheet();
        row = sheet.getHeaderRow();
        row.addCell("S").setContentAsString("Subsample");
        for (EvaluationStatistic s : m_StatisticValues)
            row.addCell(s.toString()).setContentAsString(s.toString());
        for (i = 0; i < m_Percentiles.length; i++) {
            switch (m_ErrorCalculation) {
            case ACTUAL_MINUS_PREDICTED:
                row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]);
                break;
            case PREDICTED_MINUS_ACTUAL:
                row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]);
                break;
            case ABSOLUTE:
                row.addCell("perc-Abs-" + i).setContentAsString("Percentile-Abs-" + m_Percentiles[i]);
                break;
            case BOTH:
                row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]);
                row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]);
                break;
            default:
                throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation);
            }
        }

        // set up bootstrapping
        preds = evalAll.predictions();
        random = new Random(m_Seed);
        indices = new TIntArrayList();
        size = (int) Math.round(preds.size() * m_Percentage);
        header = evalAll.getHeader();
        numeric = header.classAttribute().isNumeric();
        m_ClassIndex.setData(header.classAttribute());
        if (numeric)
            classIndex = -1;
        else
            classIndex = m_ClassIndex.getIntIndex();
        for (i = 0; i < preds.size(); i++)
            indices.add(i);

        // create fake evalutions
        subset = new TIntArrayList();
        for (iteration = 0; iteration < m_NumSubSamples; iteration++) {
            if (isStopped()) {
                sheet = null;
                break;
            }

            // determine
            subset.clear();
            if (m_WithReplacement) {
                for (i = 0; i < size; i++)
                    subset.add(indices.get(random.nextInt(preds.size())));
            } else {
                indices.shuffle(random);
                for (i = 0; i < size; i++)
                    subset.add(indices.get(i));
            }

            // create dataset from predictions
            errors = new Double[size];
            errorsRev = new Double[size];
            atts = new ArrayList<>();
            atts.add(header.classAttribute().copy("Actual"));
            data = new Instances(header.relationName() + "-" + (iteration + 1), atts, size);
            data.setClassIndex(0);
            for (i = 0; i < subset.size(); i++) {
                inst = new DenseInstance(preds.get(subset.get(i)).weight(),
                        new double[] { preds.get(subset.get(i)).actual() });
                data.add(inst);
                switch (m_ErrorCalculation) {
                case ACTUAL_MINUS_PREDICTED:
                    errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted();
                    break;
                case PREDICTED_MINUS_ACTUAL:
                    errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual();
                    break;
                case ABSOLUTE:
                    errors[i] = Math
                            .abs(preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted());
                    break;
                case BOTH:
                    errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted();
                    errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual();
                    break;
                default:
                    throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation);
                }
            }

            // perform "fake" evaluation
            try {
                eval = new Evaluation(data);
                for (i = 0; i < subset.size(); i++) {
                    if (numeric)
                        eval.evaluateModelOnceAndRecordPrediction(
                                new double[] { preds.get(subset.get(i)).predicted() }, data.instance(i));
                    else
                        eval.evaluateModelOnceAndRecordPrediction(
                                ((NominalPrediction) preds.get(subset.get(i))).distribution().clone(),
                                data.instance(i));
                }
            } catch (Exception e) {
                result = handleException(
                        "Failed to create 'fake' Evaluation object (iteration: " + (iteration + 1) + ")!", e);
                break;
            }

            // add row
            row = sheet.addRow();
            row.addCell("S").setContent(iteration + 1);
            for (EvaluationStatistic s : m_StatisticValues) {
                try {
                    row.addCell(s.toString()).setContent(EvaluationHelper.getValue(eval, s, classIndex));
                } catch (Exception e) {
                    getLogger().log(Level.SEVERE,
                            "Failed to calculate statistic in iteration #" + (iteration + 1) + ": " + s, e);
                    row.addCell(s.toString()).setMissing();
                }
            }
            for (i = 0; i < m_Percentiles.length; i++) {
                perc = new Percentile<>();
                perc.addAll(errors);
                percRev = new Percentile<>();
                percRev.addAll(errorsRev);
                switch (m_ErrorCalculation) {
                case ACTUAL_MINUS_PREDICTED:
                    row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue()));
                    break;
                case PREDICTED_MINUS_ACTUAL:
                    row.addCell("perc-PmA-" + i)
                            .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue()));
                    break;
                case ABSOLUTE:
                    row.addCell("perc-Abs-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue()));
                    break;
                case BOTH:
                    row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue()));
                    row.addCell("perc-PmA-" + i)
                            .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue()));
                    break;
                default:
                    throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation);
                }
            }
        }

        if ((result == null) && (sheet != null))
            m_OutputToken = new Token(sheet);
    }

    return result;
}

From source file:adams.flow.transformer.WekaExtractArray.java

License:Open Source License

/**
 * Executes the flow item./*from   w ww .  j a va2  s.c  om*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Double[] array;
    Instances inst;
    SpreadSheet sheet;
    int i;
    int index;
    Cell cell;

    result = null;

    array = null;
    if (m_InputToken.getPayload() instanceof Instances) {
        inst = (Instances) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(inst.numAttributes());
        else
            m_Index.setMax(inst.numInstances());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric())
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[inst.numInstances()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(i).value(index);
            } else {
                array = new Double[inst.numAttributes()];
                for (i = 0; i < array.length; i++)
                    array[i] = inst.instance(index).value(i);
            }
        }
    } else {
        sheet = (SpreadSheet) m_InputToken.getPayload();

        if (m_Type == ExtractionType.COLUMN)
            m_Index.setMax(sheet.getColumnCount());
        else
            m_Index.setMax(sheet.getRowCount());
        index = m_Index.getIntIndex();

        if (index == -1)
            result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")";
        else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true))
            result = "Column " + m_Index + " is not numeric!";

        if (result == null) {
            if (m_Type == ExtractionType.COLUMN) {
                array = new Double[sheet.getRowCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(i, index);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            } else {
                array = new Double[sheet.getColumnCount()];
                for (i = 0; i < array.length; i++) {
                    cell = sheet.getCell(index, i);
                    if ((cell != null) && !cell.isMissing())
                        array[i] = cell.toDouble();
                }
            }
        }
    }

    if (array != null)
        m_OutputToken = new Token(array);

    return result;
}

From source file:adams.flow.transformer.WekaFilter.java

License:Open Source License

/**
 * Executes the flow item.//from w  ww .j  a  v a2s .  c o m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    weka.core.Instances data;
    weka.core.Instances filteredData;
    weka.core.Instance inst;
    adams.data.instance.Instance instA;
    weka.core.Instance filteredInst;
    String relation;

    result = null;

    data = null;
    inst = null;
    if (m_InputToken.hasPayload(weka.core.Instance.class))
        inst = m_InputToken.getPayload(weka.core.Instance.class);
    else if (m_InputToken.hasPayload(adams.data.instance.Instance.class))
        inst = m_InputToken.getPayload(adams.data.instance.Instance.class).toInstance();
    else if (m_InputToken.hasPayload(weka.core.Instances.class))
        data = m_InputToken.getPayload(weka.core.Instances.class);
    else
        result = m_InputToken.unhandledData();

    if (result == null) {
        try {
            // initialize filter?
            if (!m_Initialized || !m_InitializeOnce) {
                if (data == null) {
                    data = new weka.core.Instances(inst.dataset(), 0);
                    data.add(inst);
                }
                initActualFilter(data);
            }

            synchronized (m_ActualFilter) {
                if (!m_FlowContextUpdated) {
                    m_FlowContextUpdated = true;
                    if (m_ActualFilter instanceof FlowContextHandler)
                        ((FlowContextHandler) m_ActualFilter).setFlowContext(this);
                }

                // filter data
                filteredData = null;
                filteredInst = null;
                if (data != null) {
                    relation = data.relationName();
                    filteredData = weka.filters.Filter.useFilter(data, m_ActualFilter);
                    if (m_KeepRelationName) {
                        filteredData.setRelationName(relation);
                        if (isLoggingEnabled())
                            getLogger().info("Setting relation name: " + relation);
                    }
                    m_Initialized = true;
                } else {
                    relation = inst.dataset().relationName();
                    m_ActualFilter.input(inst);
                    m_ActualFilter.batchFinished();
                    filteredInst = m_ActualFilter.output();
                    if (m_KeepRelationName) {
                        filteredInst.dataset().setRelationName(relation);
                        if (isLoggingEnabled())
                            getLogger().info("Setting relation name: " + relation);
                    }
                }
            }

            // build output token
            if (inst != null) {
                if (filteredInst != null) {
                    if (m_InputToken.getPayload() instanceof weka.core.Instance) {
                        m_OutputToken = new Token(filteredInst);
                    } else {
                        instA = new adams.data.instance.Instance();
                        instA.set(filteredInst);
                        m_OutputToken = createToken(m_InputToken.getPayload(), instA);
                    }
                } else if ((filteredData != null) && (filteredData.numInstances() > 0)) {
                    m_OutputToken = createToken(m_InputToken.getPayload(), filteredData.instance(0));
                }
            } else {
                m_OutputToken = createToken(m_InputToken.getPayload(), filteredData);
            }
        } catch (Exception e) {
            result = handleException("Failed to filter data: ", e);
        }
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}

From source file:adams.flow.transformer.WekaGetInstancesValue.java

License:Open Source License

/**
 * Executes the flow item.//w ww  . j  ava  2  s.  co  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int row;

    result = null;

    inst = (Instances) m_InputToken.getPayload();
    m_Column.setData(inst);
    m_Row.setMax(inst.numInstances());
    index = m_Column.getIntIndex();
    row = m_Row.getIntIndex();

    if (row == -1)
        result = "Failed to retrieve row: " + m_Row.getIndex();
    else if (index == -1)
        result = "Failed to retrieve column: " + m_Column.getIndex();

    if (result == null) {
        try {
            if (inst.instance(row).isMissing(index)) {
                m_OutputToken = new Token("?");
            } else {
                switch (inst.attribute(index).type()) {
                case Attribute.NUMERIC:
                    m_OutputToken = new Token(inst.instance(row).value(index));
                    break;

                case Attribute.DATE:
                case Attribute.NOMINAL:
                case Attribute.STRING:
                case Attribute.RELATIONAL:
                    m_OutputToken = new Token(inst.instance(row).stringValue(index));
                    break;

                default:
                    result = "Unhandled attribute type: " + inst.attribute(index).type();
                }
            }
        } catch (Exception e) {
            result = handleException("Failed to obtain value from dataset:", e);
        }
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java

License:Open Source License

/**
 * Executes the flow item.// w  ww  . j a  va  2  s  .com
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Instances data;
    int i;
    int n;
    Index index;
    ArrayHistogram stat;

    result = null;
    m_Queue.clear();

    try {
        sheet = null;
        data = (Instances) m_InputToken.getPayload();
        stat = new ArrayHistogram();
        stat.setBinCalculation(m_BinCalculation);
        stat.setNumBins(m_NumBins);
        stat.setBinWidth(m_BinWidth);
        stat.setNormalize(m_Normalize);
        stat.setUseFixedMinMax(m_UseFixedMinMax);
        stat.setManualMin(m_ManualMin);
        stat.setManualMax(m_ManualMax);
        stat.setDisplayRanges(true);
        stat.setNumDecimals(m_NumDecimals);

        for (i = 0; i < m_Locations.length; i++) {
            switch (m_DataType) {
            case ROW_BY_INDEX:
                index = new Index(m_Locations[i].stringValue());
                index.setMax(data.numInstances());
                stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray()));
                break;

            case COLUMN_BY_INDEX:
                index = new WekaAttributeIndex(m_Locations[i].stringValue());
                ((WekaAttributeIndex) index).setData(data);
                stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex())));
                break;

            case COLUMN_BY_REGEXP:
                for (n = 0; n < data.numAttributes(); n++) {
                    if (data.attribute(n).name().matches(m_Locations[i].stringValue())) {
                        stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n)));
                        break;
                    }
                }
                break;

            default:
                throw new IllegalStateException("Unhandled data type: " + m_DataType);
            }
        }

        sheet = stat.calculate().toSpreadSheet();
    } catch (Exception e) {
        result = handleException("Error generating the ranges: ", e);
        sheet = null;
    }

    if (sheet != null) {
        for (i = 0; i < sheet.getColumnCount(); i++)
            m_Queue.add(sheet.getColumnName(i));
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Prefixes the attributes.//from   w  ww .ja  v  a 2  s .  c om
 *
 * @param index   the index of the dataset
 * @param inst   the data to process
 * @return      the processed data
 */
protected Instances prefixAttributes(Instances inst, int index) {
    Instances result;
    String prefix;
    ArrayList<Attribute> atts;
    int i;

    prefix = createPrefix(inst, index);

    // header
    atts = new ArrayList<>();
    for (i = 0; i < inst.numAttributes(); i++)
        atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name()));

    // data
    result = new Instances(inst.relationName(), atts, inst.numInstances());
    result.setClassIndex(inst.classIndex());
    for (i = 0; i < inst.numInstances(); i++)
        result.add((Instance) inst.instance(i).copy());

    return result;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Updates the IDs in the hashset with the ones stored in the ID attribute
 * of the provided dataset./*from   w w w.  j av a2  s  .c  o  m*/
 *
 * @param instIndex    the dataset index
 * @param inst   the dataset to obtain the IDs from
 * @param ids      the hashset to store the IDs in
 */
protected void updateIDs(int instIndex, Instances inst, HashSet ids) {
    Attribute att;
    int i;
    boolean numeric;
    HashSet current;
    Object id;

    att = inst.attribute(m_UniqueID);
    if (att == null)
        throw new IllegalStateException("Attribute '" + m_UniqueID + "' not found in relation '"
                + inst.relationName() + "' (#" + (instIndex + 1) + ")!");

    // determine/check type
    if (m_AttType == -1) {
        if ((att.type() == Attribute.NUMERIC) || (att.type() == Attribute.STRING))
            m_AttType = att.type();
        else
            throw new IllegalStateException("Attribute '" + m_UniqueID + "' must be either NUMERIC or STRING (#"
                    + (instIndex + 1) + ")!");
    } else {
        if (m_AttType != att.type())
            throw new IllegalStateException("Attribute '" + m_UniqueID
                    + "' must have same attribute type in all the datasets (#" + (instIndex + 1) + ")!");
    }

    // get IDs
    numeric = m_AttType == Attribute.NUMERIC;
    current = new HashSet();
    for (i = 0; i < inst.numInstances(); i++) {
        if (numeric)
            id = inst.instance(i).value(att);
        else
            id = inst.instance(i).stringValue(att);
        if (m_Strict && current.contains(id))
            throw new IllegalStateException(
                    "ID '" + id + "' is not unique in dataset #" + (instIndex + 1) + "!");
        current.add(id);
    }
    ids.addAll(current);
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Merges the datasets based on the collected IDs.
 *
 * @param orig   the original datasets/* w w  w. jav  a 2 s  .  com*/
 * @param inst   the processed datasets to merge into one
 * @param ids      the IDs for identifying the rows
 * @return      the merged dataset
 */
protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) {
    Instances result;
    ArrayList<Attribute> atts;
    int i;
    int n;
    int m;
    int index;
    String relation;
    List sortedIDs;
    Attribute att;
    int[] indexStart;
    double value;
    double[] values;
    HashMap<Integer, Integer> hashmap;
    HashSet<Instance> hs;

    // create header
    if (isLoggingEnabled())
        getLogger().info("Creating merged header...");
    atts = new ArrayList<>();
    relation = "";
    indexStart = new int[inst.length];
    for (i = 0; i < inst.length; i++) {
        indexStart[i] = atts.size();
        for (n = 0; n < inst[i].numAttributes(); n++)
            atts.add((Attribute) inst[i].attribute(n).copy());
        // assemble relation name
        if (i > 0)
            relation += "_";
        relation += inst[i].relationName();
    }
    result = new Instances(relation, atts, ids.size());

    // fill with missing values
    if (isLoggingEnabled())
        getLogger().info("Filling with missing values...");
    for (i = 0; i < ids.size(); i++) {
        if (isStopped())
            return null;
        // progress
        if (isLoggingEnabled() && ((i + 1) % 1000 == 0))
            getLogger().info("" + (i + 1));
        result.add(new DenseInstance(result.numAttributes()));
    }

    // sort IDs
    if (isLoggingEnabled())
        getLogger().info("Sorting indices...");
    sortedIDs = new ArrayList(ids);
    Collections.sort(sortedIDs);

    // generate rows
    hashmap = new HashMap<>();
    for (i = 0; i < inst.length; i++) {
        if (isStopped())
            return null;
        if (isLoggingEnabled())
            getLogger().info("Adding file #" + (i + 1));
        att = orig[i].attribute(m_UniqueID);
        for (n = 0; n < inst[i].numInstances(); n++) {
            // progress
            if (isLoggingEnabled() && ((n + 1) % 1000 == 0))
                getLogger().info("" + (n + 1));

            // determine index of row
            if (m_AttType == Attribute.NUMERIC)
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att));
            else
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att));
            if (index < 0)
                throw new IllegalStateException(
                        "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!");

            if (!hashmap.containsKey(index))
                hashmap.put(index, 0);
            hashmap.put(index, hashmap.get(index) + 1);

            // use internal representation for faster access
            values = result.instance(index).toDoubleArray();

            // add attribute values
            for (m = 0; m < inst[i].numAttributes(); m++) {
                // missing value?
                if (inst[i].instance(n).isMissing(m))
                    continue;

                switch (inst[i].attribute(m).type()) {
                case Attribute.NUMERIC:
                case Attribute.DATE:
                case Attribute.NOMINAL:
                    values[indexStart[i] + m] = inst[i].instance(n).value(m);
                    break;

                case Attribute.STRING:
                    value = result.attribute(indexStart[i] + m)
                            .addStringValue(inst[i].instance(n).stringValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                case Attribute.RELATIONAL:
                    value = result.attribute(indexStart[i] + m)
                            .addRelation(inst[i].instance(n).relationalValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                default:
                    throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type());
                }
            }

            // update row
            result.set(index, new DenseInstance(1.0, values));
        }
    }

    if (getRemove()) {
        hs = new HashSet<>();
        for (Integer x : hashmap.keySet()) {
            if (hashmap.get(x) != inst.length)
                hs.add(result.get(x));
        }
        result.removeAll(hs);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesStatistic.java

License:Open Source License

/**
 * Executes the flow item./*from   ww  w  . j a va 2 s  .c om*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    SpreadSheet sheet;
    Instances data;
    int i;
    int n;
    Index index;
    AbstractArrayStatistic stat;

    result = null;

    try {
        sheet = null;
        data = (Instances) m_InputToken.getPayload();
        stat = m_Statistic.shallowCopy(true);

        for (i = 0; i < m_Locations.length; i++) {
            switch (m_DataType) {
            case ROW_BY_INDEX:
                index = new Index(m_Locations[i].stringValue());
                index.setMax(data.numInstances());
                stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray()));
                break;

            case COLUMN_BY_INDEX:
                index = new WekaAttributeIndex(m_Locations[i].stringValue());
                ((WekaAttributeIndex) index).setData(data);
                stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex())));
                break;

            case COLUMN_BY_REGEXP:
                for (n = 0; n < data.numAttributes(); n++) {
                    if (data.attribute(n).name().matches(m_Locations[i].stringValue())) {
                        stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n)));
                        break;
                    }
                }
                break;

            default:
                throw new IllegalStateException("Unhandled data type: " + m_DataType);
            }
        }

        sheet = stat.calculate().toSpreadSheet();
    } catch (Exception e) {
        result = handleException("Error generating the statistic: ", e);
        sheet = null;
    }

    if (sheet != null)
        m_OutputToken = new Token(sheet);

    return result;
}