Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Computes the distribution for a given instance
 *
 * @param instance the instance for which distribution is computed
 * @return the distribution/*from ww w .j  av  a 2s. c  o m*/
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {

    m_ReplaceMissingValues.input(instance);
    instance = m_ReplaceMissingValues.output();
    m_AttFilter.input(instance);
    instance = m_AttFilter.output();
    m_NominalToBinary.input(instance);
    instance = m_NominalToBinary.output();

    // Extract the predictor columns into an array
    double[] instDat = new double[m_NumPredictors + 1];
    int j = 1;
    instDat[0] = 1;
    for (int k = 0; k <= m_NumPredictors; k++) {
        if (k != m_ClassIndex) {
            instDat[j++] = instance.value(k);
        }
    }

    double[] distribution = evaluateProbability(instDat);
    return distribution;
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java

License:Open Source License

public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) {
    final StringBuilder sb = new StringBuilder();
    if (withHTMLHeader)
        sb.append("<html><body>");
    for (int i = 0; i < inst.numAttributes(); i++) {
        sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = ");
        sb.append("<b>");
        if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) {
            sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i)));
        } else if (inst.attribute(i).isDate()) {
            final Calendar cal = Calendar.getInstance();
            cal.setTimeInMillis((long) inst.value(i));
            sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime()));
        } else if (inst.attribute(i).isNumeric()) {
            sb.append(inst.value(i));/* w  w w  . jav  a  2s. co  m*/
        }
        sb.append("</b>");
        sb.append("<br/>");
    }
    if (withHTMLHeader)
        sb.append("</body></html>");
    return sb.toString();
}

From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java

License:Open Source License

private static void fillWithSingleAxis(final Instances dataSet, final int dateIdx,
        final TimeSeriesCollection tsDataset) {
    final int numInstances = dataSet.numInstances();

    final Calendar cal = Calendar.getInstance();
    for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) {
        if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) {
            System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name()
                    + "', so skip it!");
            continue;
        }//  ww w. j a  va2 s. co m
        final TimeSeries ts = new TimeSeries(dataSet.attribute(i).name());
        for (int k = 0; k < numInstances; k++) {
            final Instance instancek = dataSet.instance(k);
            final long timeInMilliSec = (long) instancek.value(dateIdx);
            cal.setTimeInMillis(timeInMilliSec);

            if (instancek.isMissing(i)) {
                ts.addOrUpdate(new Millisecond(cal.getTime()), null);
            } else {
                ts.addOrUpdate(new Millisecond(cal.getTime()), instancek.value(i));
            }
        }
        if (!ts.isEmpty())
            tsDataset.addSeries(ts);
    }
}

From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java

License:Open Source License

private static void fillWithSingleAxisInterval(final Instances dataSet, final int dateIdx,
        final YIntervalSeriesCollection tsDataset, final double deviation, final int deviatedAttrIdx) {
    final int numInstances = dataSet.numInstances();

    for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) {
        if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) {
            System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name()
                    + "', so skip it!");
            continue;
        }//from  w  ww. ja  va  2s. c o  m
        final YIntervalSeries ts = new YIntervalSeries(dataSet.attribute(i).name());
        for (int k = 0; k < numInstances; k++) {
            final Instance instancek = dataSet.instance(k);
            final long timeInMilliSec = (long) instancek.value(dateIdx);

            if (instancek.isMissing(i)) {
                //ts.add(timeInMilliSec,null,0d,0d);               
            } else {
                if (i == deviatedAttrIdx && k > 0 && k < (numInstances - 1)) {
                    System.out.println(numInstances + " " + k + " " + instancek.value(i) + " "
                            + (instancek.value(i) - deviation) + " " + (instancek.value(i) + deviation));
                    ts.add(timeInMilliSec, instancek.value(i), instancek.value(i) - deviation,
                            instancek.value(i) + deviation);
                } else {
                    ts.add(timeInMilliSec, instancek.value(i), instancek.value(i), instancek.value(i));
                }
                //System.out.println(instancek.value(i)+" "+(instancek.value(i)-deviation)+" "+(instancek.value(i)+deviation));
            }
        }
        if (!ts.isEmpty())
            tsDataset.addSeries(ts);
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.ui.GapFillingFrame.java

License:Open Source License

private Instances buildCorrectedDataset(final Instances diff) {
    //System.out.println("Build a corrected dataset ...");

    final Instances correctedDataSet = new Instances(dataSet);
    final int corrNumInstances = correctedDataSet.numInstances();

    final int diffNumInstances = diff.numInstances();
    final int diffNumAttributes = diff.numAttributes();

    final int idxInDiff = 0;

    for (int k = 0; k < diffNumInstances; k++) {
        final Instance diffInstanceK = diff.instance(k);

        if (diffInstanceK.isMissing(idxInDiff))
            continue;

        final long timestamp = (long) diffInstanceK.value(diffNumAttributes - 1);

        for (int h = 0; h < corrNumInstances; h++) {
            if ((long) correctedDataSet.instance(h).value(dateIdx) == timestamp) {
                correctedDataSet.instance(h).setValue(attr, diffInstanceK.value(idxInDiff));
                break;
            }/*w  w w .j a  va  2 s . co m*/
        }
    }

    //System.out.println("... corrected dataset built!");

    return correctedDataSet;
}

From source file:lu.lippmann.cdb.ext.hydviga.ui.GapsTabView.java

License:Open Source License

/**
 * {@inheritDoc}/* w w w.j av a  2 s . com*/
 */
@Override
public void update0(final Instances dataSet) throws Exception {
    if (this.dateAttributeField != null) {
        this.jxp.remove(this.dateAttributeField);
        this.dateAttributeField = null;
        this.jxp.updateUI();
    }

    final java.util.List<String> dateAttributeNames = WekaDataStatsUtil.getDateAttributeNames(dataSet);
    final boolean hasDateAttributes = (!dateAttributeNames.isEmpty())
    /*&&(WekaDataStatsUtil.getNumericAttributesIndexes(dataSet).size()>0)*/;

    if (hasDateAttributes) {
        this.dateAttributeField = new JXComboBox(dateAttributeNames.toArray());
        this.dateAttributeField.setBorder(new TitledBorder("Date attribute"));
        this.jxp.add(this.dateAttributeField, BorderLayout.SOUTH);
        this.dateAttributeField.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                fillTabs(dataSet);
            }
        });

        new AbstractSimpleAsync<Void>(true) {
            @Override
            public Void execute() throws Exception {
                fillTabs(dataSet);
                return null;
            }

            @Override
            public void onSuccess(Void result) {
            }

            @Override
            public void onFailure(Throwable caught) {
                caught.printStackTrace();
            }
        }.start();

        this.actionsForFictiveGapsPanel.removeAll();
        //final JComboBox seriesWithoutGapCB=new JComboBox(WekaTimeSeriesUtil.getNamesOfAttributesWithoutGap(dataSet).toArray());
        final JComboBox seriesWithoutGapCB = new JComboBox(
                WekaDataStatsUtil.getAttributeNames(dataSet).toArray());
        seriesWithoutGapCB.setBorder(new TitledBorder("Fictive gap in"));
        this.actionsForFictiveGapsPanel.add(seriesWithoutGapCB);
        final JComboBox sizeGapCB = new JComboBox(new Object[] { 10, 50, 100, 200, 400, 500 });
        sizeGapCB.setBorder(new TitledBorder("Size of the fictive gap"));
        this.actionsForFictiveGapsPanel.add(sizeGapCB);
        final Object[] partChoice = new Object[PARTS_COUNT];
        for (int iii = 0; iii < PARTS_COUNT; iii++) {
            partChoice[iii] = iii + "/" + PARTS_COUNT;
        }

        final JComboBox positionGapCB = new JComboBox(partChoice);
        positionGapCB.setBorder(new TitledBorder("Position of the fictive gap"));
        this.actionsForFictiveGapsPanel.add(positionGapCB);

        this.fictiveGapButton = new JXButton("Create a fictive gap");
        this.actionsForFictiveGapsPanel.add(this.fictiveGapButton);

        this.fictiveGapButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                /* create a fake gap */
                final Attribute attr = dataSet.attribute(seriesWithoutGapCB.getSelectedItem().toString());
                final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet);
                final int position = ((positionGapCB.getSelectedIndex() + 1) * dataSet.numInstances())
                        / PARTS_COUNT;
                final int gapsize = Integer.valueOf(sizeGapCB.getSelectedItem().toString());

                /* show it */
                final GapFillingFrame jxf = new GapFillingFrame(getAbstractTabView(), new Instances(dataSet),
                        attr, dateIdx, GapsUtil.getCountOfValuesBeforeAndAfter(gapsize), position, gapsize, gcp,
                        false);
                //jxf.setSize(new Dimension(900,700));
                //jxf.setExtendedState(Frame.MAXIMIZED_BOTH);                        
                jxf.setLocationRelativeTo(jxp);
                jxf.setVisible(true);
                //jxf.setResizable(false);            
            }
        });

        this.showKnowledgeDBButton = new JXButton("Show KDB");
        this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBButton);
        this.showKnowledgeDBButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {

                try {
                    final DatasetView view = new DatasetView("KnowledgeDB", eventPublisher, commandDispatcher,
                            applicationContext);
                    view.setDataSet(GapFillingKnowledgeDB.getKnowledgeDB()).setAsVisible(true);
                } catch (final Exception ee) {
                    ee.printStackTrace();
                }
            }
        });

        this.inspectKnowledgeDBButton = new JXButton("Inspect KDB");
        this.actionsForFictiveGapsPanel.add(this.inspectKnowledgeDBButton);
        this.inspectKnowledgeDBButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                try {
                    final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet);
                    new GapFillingKnowledgeDBExplorerFrame(dataSet, dateIdx, gcp);
                } catch (final Exception ee) {
                    ee.printStackTrace();
                }
            }
        });

        this.showKnowledgeDBWithTrueCasesButton = new JXButton("Show KDB with true cases");
        this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBWithTrueCasesButton);
        this.showKnowledgeDBWithTrueCasesButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                try {
                    final StringBuilder sb = new StringBuilder("@relation blabla\n");
                    sb.append("@attribute serieName string\n");
                    sb.append("@attribute serieX numeric\n");
                    sb.append("@attribute serieY numeric\n");
                    sb.append("@attribute year numeric\n");
                    sb.append("@attribute season {Winter,Spring,Summer,Autumn}\n");
                    sb.append("@attribute gapSize numeric\n");
                    sb.append("@attribute gapPosition numeric\n");
                    sb.append("@attribute isDuringRising {true,false}\n");
                    sb.append("@attribute flow string\n");
                    sb.append("@attribute hasDownstream {false,true}\n");
                    sb.append("@attribute hasUpstream {false,true}\n");
                    sb.append("@attribute isReal {false,true}\n");
                    sb.append(
                            "@attribute algo {Interpolation,EM,REG,REPTREE,M5P,ZeroR,ANN,NEARESTNEIGHBOUR}\n");
                    sb.append("@attribute useDiscretizedTime {false,true}\n");
                    sb.append("@attribute useMostSimilar {false,true}\n");
                    sb.append("@attribute useNearest {true,false}\n");
                    sb.append("@attribute useDownstream {false,true}\n");
                    sb.append("@attribute useUpstream {true,false}\n");
                    sb.append("@attribute mae numeric\n");
                    sb.append("@attribute rmse numeric\n");
                    sb.append("@attribute rsr numeric\n");
                    sb.append("@attribute pbias numeric\n");
                    sb.append("@attribute ns numeric\n");
                    sb.append("@attribute ioa numeric\n");
                    sb.append("@attribute wasTheBestSolution {true,false}\n");

                    sb.append("@data\n");

                    /* true cases */
                    final Calendar cal = Calendar.getInstance();
                    final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet);
                    final Instances gapsDescriptionsDataset = GapsUtil.buildGapsDescription(gcp, dataSet,
                            dateIdx);
                    final int gddc = gapsDescriptionsDataset.numInstances();
                    for (int i = 0; i < gddc; i++) {
                        final Instance trueCase = gapsDescriptionsDataset.instance(i);
                        sb.append(trueCase.stringValue(0)); // serie
                        sb.append(",");
                        sb.append(gcp.getCoordinates(trueCase.stringValue(0))[0]); // x
                        sb.append(",");
                        sb.append(gcp.getCoordinates(trueCase.stringValue(0))[1]); // y
                        sb.append(",");
                        cal.setTime(FormatterUtil.DATE_FORMAT.parse(trueCase.stringValue(1))); // year
                        sb.append(cal.get(Calendar.YEAR));
                        sb.append(",");
                        sb.append(trueCase.stringValue(2).split("/")[0]); // season
                        sb.append(",");
                        sb.append(trueCase.value(4)); //gapsize
                        sb.append(",");
                        sb.append(trueCase.value(5)); //gap position
                        sb.append(",");
                        sb.append(trueCase.stringValue(10).equals("true")); //rising
                        sb.append(",");
                        sb.append(trueCase.stringValue(11)); // flow
                        sb.append(",");
                        sb.append(!trueCase.stringValue(9).equals("n/a")); //downstream
                        sb.append(",");
                        sb.append(!trueCase.stringValue(8).equals("n/a")); // upstream
                        sb.append(",");
                        sb.append("true");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append(",");
                        sb.append("?");
                        sb.append("\n");
                    }

                    /* the simulated cases from the knowledge DB */
                    final Instances knowledgeDB = GapFillingKnowledgeDB.getKnowledgeDB();
                    final int kni = knowledgeDB.numInstances();
                    for (int i = 0; i < kni; i++) {
                        final Instance simulatedCase = knowledgeDB.instance(i);
                        sb.append(simulatedCase.stringValue(0)); // name
                        sb.append(",");
                        sb.append(simulatedCase.value(1)); //x
                        sb.append(",");
                        sb.append(simulatedCase.value(2)); //y               
                        sb.append(",");
                        sb.append(simulatedCase.value(6)); //year
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(5)); //season
                        sb.append(",");
                        sb.append(simulatedCase.value(3)); // size
                        sb.append(",");
                        sb.append(simulatedCase.value(4)); // position
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(7)); // rising
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(8)); //flow
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(9)); //downstream
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(10)); // upstream
                        sb.append(",");
                        sb.append("false"); // real
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(11)); //algo
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(12)); // discr time
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(13)); // most similar
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(14)); // nearest
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(15)); //downstream
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(16)); //upstream
                        sb.append(",");
                        sb.append(simulatedCase.value(17)); //mae
                        sb.append(",");
                        sb.append(simulatedCase.value(18)); //rmse
                        sb.append(",");
                        sb.append(simulatedCase.value(19)); //rsr
                        sb.append(",");
                        sb.append(simulatedCase.value(20)); //pbias
                        sb.append(",");
                        sb.append(simulatedCase.value(21)); //ns
                        sb.append(",");
                        sb.append(simulatedCase.value(22)); //ioa
                        sb.append(",");
                        sb.append(simulatedCase.stringValue(23)); // best                                    
                        sb.append("\n");
                    }

                    //System.out.println(sb.toString());

                    final Instances newds = WekaDataAccessUtil.loadInstancesFromARFFString(sb.toString(), false,
                            false);
                    final DatasetView view = new DatasetView("KnowledgeDB with true cases", eventPublisher,
                            commandDispatcher, applicationContext);
                    view.setDataSet(newds).setAsVisible(true);
                } catch (final Exception ee) {
                    ee.printStackTrace();
                }
            }
        });

        this.rebuildKnowledgeDBButton = new JXButton("Rebuild KDB");
        this.actionsForFictiveGapsPanel.add(this.rebuildKnowledgeDBButton);
        this.rebuildKnowledgeDBButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                rebuildKnowledgeDB(dataSet);
            }
        });
    } else {
        throw new Exception("No date attributes in the dataset.");
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.util.TransformTimeSeries.java

License:Open Source License

/**
 * Main method./*www .  j av  a 2  s .c  o  m*/
 * @param args command line arguments
 */
public static final void main(final String[] args) {
    try {
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File("."
                + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2.arff"));
        System.out.println(dataSet.toSummaryString());

        final int numAttributes = dataSet.numAttributes();
        final int numInstances = dataSet.numInstances();
        for (int i = 0; i < numAttributes; i++) {
            final int i_bis = (int) (Math.random() * (double) (numAttributes - 3));
            final int i_tri = (int) (Math.random() * (double) (numAttributes - 3));

            for (int j = 0; j < numInstances; j++) {
                final Instance instance_j = dataSet.instance(j);

                if (instance_j.isMissing(i))
                    continue;
                if (instance_j.isMissing(i_bis))
                    continue;
                if (instance_j.isMissing(i_tri))
                    continue;

                final double iValue = instance_j.value(i);
                final double iBisValue = instance_j.value(i_bis);
                final double iTriValue = instance_j.value(i_tri);

                instance_j.setValue(i, (iValue + iBisValue + iTriValue));
            }
        }

        WekaDataAccessUtil.saveInstancesIntoARFFFile(dataSet, new File("." + File.separatorChar + "data_fake"
                + File.separatorChar + "all_valid_q_series_complete2_fake.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*from  w w w.j a  v a2 s .com*/
 * @return
 * @throws Exception 
 */
private void computeMatrixMDF() throws Exception {
    //Build filtered instance for each element of the base
    int baseSize = base.size();
    Attribute baseAttribute = instances.attribute(baseIndex);

    //Create baseSize copy of instances for filtering
    List<Instances> filteredInstances = new ArrayList<Instances>();
    for (int i = 0; i < baseSize; i++) {
        filteredInstances.add(new Instances(instances));
    }

    //Filter
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        for (final TupleSI j : base) {
            final int wekaAttributeValue = (int) instance.value(baseIndex);
            if (!new TupleSI(baseAttribute.value(wekaAttributeValue), baseIndex).equals(j)) {
                WekaUtil2.removeFromInstances(filteredInstances.get(base.indexOf(j)), instance);
            }
        }
    }

    //Compute I vector
    final int idxsNs = idxsN.size();

    if (idxsNs == 0) {
        throw new Exception("You need at least one numerical attribute !!");
    }

    int minIndexForI = -1;
    double minValueForI = Double.MAX_VALUE;
    final double[][] meanBase = new double[idxsNs][baseSize];
    int p = 0;
    for (final Integer num : idxsN) {
        double Ip = 0.0;
        for (int j = 0; j < baseSize; j++) {
            final List<Instance> filtredInstance = filteredInstances.get(j);
            final int fs = filtredInstance.size();
            double mean = 0;
            for (int l = 0; l < fs; l++) {
                mean += filtredInstance.get(l).value(num);
            }
            mean = mean / fs;
            meanBase[p][j] = mean;
            for (int l = 0; l < fs; l++) {
                Ip += Math.pow(filtredInstance.get(l).value(num) - mean, 2);
            }
        }
        if (Ip < minValueForI) {
            minValueForI = Ip;
            minIndexForI = p;
        }
        p++;
    }

    this.maxNum = new double[idxsNs];
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        //Save maximum value for each numerical attribute
        for (Integer n1 : idxsN) {
            double val = instance.value(n1);
            int idx = idxsN.indexOf(n1);
            if (val > maxNum[idx]) {
                maxNum[idx] = val;
            }
        }
        //Compute matrix M for each categorical attribute
        for (final Integer e1 : idxsC) {
            for (final Integer e2 : idxsC) {
                final int i1 = getIndexOf(e1, instance.attribute(e1).value((int) instance.value(e1)));
                final int j1 = getIndexOf(e2, instance.attribute(e2).value((int) instance.value(e2)));
                M[i1][j1] = M[i1][j1] + 1;
            }
        }
    }

    //Compute D matrix
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            double d = M[i][j] / (M[i][i] + M[j][j] - M[i][j] + 0.0);
            if (d >= theta) {
                D[i][j] = d;
            } else {
                D[i][j] = 0;
            }
        }
    }

    //Compute F matrix for base
    for (final TupleSI baseVal : base) {
        F.put(baseVal, meanBase[minIndexForI][base.indexOf(baseVal)]);
    }

    //Compute F matrix for noBase
    for (final TupleSI noBaseVal : noBase) {
        double f = 0.0;
        for (final TupleSI baseVal : base) {
            f += D[getIndexOf(noBaseVal)][getIndexOf(baseVal)] * F.get(baseVal);
        }
        F.put(noBaseVal, f);
    }

}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*  w  w w  . j ava  2  s  .co  m*/
 * @return
 */
public Instances getModifiedInstances() {

    //Copy attribute list (and change categorical by numerical)
    final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attr = instances.attribute(i);
        if (attr.isNumeric() || attr.index() == instances.classIndex()) {
            lAttrs.add(attr);
        } else {
            Attribute newAttr = new Attribute(attr.name());
            lAttrs.add(newAttr);
        }
    }

    //Build new instance
    final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances());
    newInstances.setClassIndex(instances.classIndex());
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        final Instance cpyInstance = (Instance) instance.copy();
        for (int j = 0; j < instance.numAttributes(); j++) {
            Attribute attribute = instance.attribute(j);
            int k = 0;
            if (attribute.index() == instances.classIndex()) {
                //The class index is nominal
                cpyInstance.setValue(attribute, instance.stringValue(j));
            } else if (!attribute.isNumeric()) {
                String elt = attribute.value((int) instance.value(j));
                cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j)));
            } else {
                if (maxNum[k] > 1) {
                    cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]);
                }
                k++;
            }
        }
        newInstances.add(cpyInstance);
    }

    if (ignoreClass && instances.classIndex() != -1) {
        newInstances.deleteAttributeAt(instances.classIndex());
    }
    return newInstances;
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * Generate the centroid coordinates based 
 * on it's  members (objects assigned to the cluster of the centroid) and the distance 
 * function being used./*  w  ww .  ja  v  a  2 s .c  o m*/
 * @return the centroid
 */
public static MixedCentroid computeMixedCentroid(final boolean preserveOrder,
        final NormalizableDistance distanceFunction, final Instances numericInstances,
        final Instances originalInstances, final int clusterIndex) {
    final int numInstances = numericInstances.numInstances();
    final int numAttributes = numericInstances.numAttributes();

    final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>();

    if (numInstances == 1) {
        Instance uniqueNumInstance = numericInstances.firstInstance();
        Instance uniqueMixInstance = originalInstances.firstInstance();
        double[] centroid = uniqueNumInstance.toDoubleArray();
        for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) {
            if (!uniqueMixInstance.attribute(i).isNumeric()) {
                final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i));
                addedAttr.put(new TupleSI(catVal, i), 1);
            }
        }
        return new MixedCentroid(clusterIndex, centroid, addedAttr);
    }

    final double[] vals = new double[numAttributes];

    //used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance);
    final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance);

    if (isManhattanDist) {
        middle = (numInstances - 1) / 2;
        dataIsEven = ((numInstances % 2) == 0);
        if (preserveOrder) {
            sortedMembers = numericInstances;
        } else {
            sortedMembers = new Instances(numericInstances);
        }
    }

    for (int j = 0; j < numAttributes; j++) {
        //in case of Euclidian distance the centroid is the mean point
        //in case of Manhattan distance the centroid is the median point
        //in both cases, if the attribute is nominal, the centroid is the mode            
        if (isEuclideanDist) {
            vals[j] = numericInstances.meanOrMode(j);

            for (int i = 0; i < numInstances; i++) {
                if (!originalInstances.attribute(j).isNumeric()) {
                    final Instance instance = originalInstances.instance(i);
                    final String catVal = instance.attribute(j).value((int) instance.value(j));
                    //Initialize map
                    final TupleSI key = new TupleSI(catVal, j);
                    if (!addedAttr.containsKey(key))
                        addedAttr.put(key, 0);
                    addedAttr.put(key, addedAttr.get(key) + 1);
                }
            }
        } else if (isManhattanDist) {
            sortedMembers.kthSmallestValue(j, middle + 1);
            vals[j] = sortedMembers.instance(middle).value(j);
            if (dataIsEven) {
                sortedMembers.kthSmallestValue(j, middle + 2);
                vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
            }
        } else {
            throw new IllegalStateException("Not handled distance ...");
        }
    }

    return new MixedCentroid(clusterIndex, vals, addedAttr);
}