Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:lu.lippmann.cdb.datasetview.tasks.UnsupervisedFeatureSelectionTask.java

License:Open Source License

/**
 * {@inheritDoc}/*w  w w. j  a  v a  2 s . c o m*/
 */
@Override
Instances process0(final Instances dataSet) throws Exception {
    final int k;
    if (this.ratio == -1)
        k = getFeaturesCountFromInput(null, dataSet.numAttributes());
    else
        k = (int) Math.round(this.ratio * dataSet.numAttributes());

    final List<Integer> attrToKeep = WekaMachineLearningUtil.computeUnsupervisedFeaturesSelection(dataSet, k);
    if (!attrToKeep.contains(dataSet.classIndex()))
        attrToKeep.add(dataSet.classIndex());
    final int[] array = ArraysUtil.transform(attrToKeep);

    System.out.println("unsupervised fs -> before=" + dataSet.numAttributes() + " after=" + array.length);

    final Instances newds = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(dataSet, array);
    final Attribute clsAttr = newds.attribute(dataSet.classAttribute().name());
    System.out.println(clsAttr + " " + dataSet.classAttribute().name());
    newds.setClass(clsAttr);
    return newds;
}

From source file:lu.lippmann.cdb.datasetview.tasks.UnsupervisedMergeValuesOfFirstNominalTask.java

License:Open Source License

/**
 * {@inheritDoc}//from   w w  w  .  ja  va 2s . c  om
 */
@Override
Instances process0(final Instances dataSet) throws Exception {
    final int[] idxs = WekaDataStatsUtil.getNominalAttributesIndexes(dataSet);
    if (idxs.length > 0) {
        int idx = idxs[0];
        if (idx == dataSet.classIndex() && idxs.length > 1)
            idx = idxs[1];
        final Instances newds = WekaDataProcessingUtil.buildDataSetWithUnsupervisedMergeNominalValues(dataSet,
                idx);
        return newds;
    } else {
        return dataSet;
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java

License:Open Source License

private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx,
        final boolean withLegend) {
    final XYSeriesCollection data = new XYSeriesCollection();
    final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>();
    final int classIndex = dataSet.classIndex();
    if (classIndex < 0) {
        final XYSeries series = new XYSeries("Serie", false);
        for (int i = 0; i < dataSet.numInstances(); i++) {
            series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
        }/*from   w w  w. j a  v a  2 s  .c  o m*/
        data.addSeries(series);
    } else {
        final Set<String> pvs = new TreeSet<String>(
                WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex));
        int p = 0;
        for (final String pv : pvs) {
            final XYSeries series = new XYSeries(pv, false);
            for (int i = 0; i < dataSet.numInstances(); i++) {
                if (dataSet.instance(i).stringValue(classIndex).equals(pv)) {
                    if (!filteredInstances.containsKey(p)) {
                        filteredInstances.put(p, new ArrayList<Instance>());
                    }
                    filteredInstances.get(p).add(dataSet.instance(i));

                    series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
                }
            }
            data.addSeries(series);

            p++;
        }

    }

    final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title
            dataSet.attribute(xidx).name(), // x axis label
            dataSet.attribute(yidx).name(), // y axis label
            data, // data
            PlotOrientation.VERTICAL, withLegend, // include legend
            true, // tooltips
            false // urls
    );

    final XYPlot xyPlot = (XYPlot) chart.getPlot();
    xyPlot.setBackgroundImage(shapeImage);

    final XYItemRenderer renderer = xyPlot.getRenderer();
    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            if (classIndex < 0) {
                return InstanceFormatter.htmlFormat(dataSet.instance(item), true);
            } else {
                return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true);
            }
        }
    };

    xyPlot.getRangeAxis().setVisible(false);
    xyPlot.getDomainAxis().setVisible(false);

    xyPlot.getRangeAxis().setLowerBound(60000);
    xyPlot.getRangeAxis().setUpperBound(135000);
    xyPlot.getDomainAxis().setLowerBound(45000);
    xyPlot.getDomainAxis().setUpperBound(110000);

    xyPlot.setDomainGridlinesVisible(false);
    xyPlot.setRangeGridlinesVisible(false);

    xyPlot.setBackgroundPaint(Color.white);

    int nbSeries;
    if (classIndex < 0) {
        nbSeries = 1;
    } else {
        nbSeries = filteredInstances.keySet().size();
    }

    for (int i = 0; i < nbSeries; i++) {
        renderer.setSeriesToolTipGenerator(i, gen);
    }

    final XYItemLabelGenerator lg = new XYItemLabelGenerator() {
        @Override
        public String generateLabel(final XYDataset ds, final int series, final int item) {
            final Instance iii = filteredInstances.get(series).get(item);
            if (iii.stringValue(3).equals(SELECTED_STATUS)) {
                final String label = iii.stringValue(0);
                return label.substring(0, label.length() - 4);
            } else
                return null;
        }
    };
    xyPlot.getRenderer().setBaseItemLabelGenerator(lg);
    xyPlot.getRenderer().setBaseItemLabelsVisible(true);
    xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12));

    xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE);
    xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210));
    xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY);

    //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0));

    final ChartPanel cp = new ChartPanel(chart);
    cp.setDomainZoomable(false);
    cp.setRangeZoomable(false);

    return cp;
}

From source file:lu.lippmann.cdb.ext.hydviga.ui.HydroDatasetView.java

License:Open Source License

public HydroDatasetView setDataSet(final Instances pdataSet) {
    if (pdataSet.classIndex() != -1 && !pdataSet.classAttribute().isNominal())
        pdataSet.setClassIndex(-1);// w  ww  .j  a  v a 2s.c  o  m

    if (this.initialDataSet == null) {
        this.initialDataSet = pdataSet;
        this.initialCompleteness = new CompletenessComputer(this.initialDataSet);
        this.dataCompletenessProgressBar.setMaximum(pdataSet.numInstances() * pdataSet.numAttributes());
        reinitDataCompleteness();
    }

    this.dataSet = pdataSet;

    if (!filtered)
        this.notFilteredDataSet = pdataSet;

    //updateClassSelectionMenu();
    this.supervisedTransformPane.setVisible(pdataSet.classIndex() != -1);

    for (final TabView tv : tabViews) {
        tv.update(dataSet);
    }

    try {
        updateFiltersPane(dataSet);
    } catch (Exception e) {
        eventPublisher.publish(new ErrorOccuredEvent("Error when updating filters", e));
    }

    updateTooltipShowingDatasetDimensions();

    return this;
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * /*from  w  w  w . j a  va 2 s  . co m*/
 * @param args
 * @throws Exception 
 */
public static void main(String[] args) {

    //String filePath = "./samples/csv/salary.csv";
    //String filePath = "./samples/csv/shih.csv";
    //String filePath = "./samples/csv/uci/zoo.csv";
    String filePath = "./samples/csv/bank.csv";
    //String filePath = "./samples/csv/uci/mushroom.csv";
    //String filePath = "./samples/csv/uci/house-votes-84.csv";
    //String filePath = "./samples/csv/uci/credit-g.csv";

    try {
        final Shih2010 shih = new Shih2010(filePath, false, true, 0.01);

        final Instances oldInstances = shih.getInstances();
        final int clsIndex = oldInstances.classIndex();

        System.out.println("Base = " + shih.getBase());
        System.out.println("NoBase =" + shih.getNoBase());

        int[][] M = shih.getM();
        showMatrix1(M);

        System.out.println("----------------");

        double[][] D = shih.getD();
        showMatrix2(D);

        System.out.println("----------------");

        Map<TupleSI, Double> F = shih.getF();
        System.out.println(F);

        System.out.println("----------------");

        //Launch k-means for testing
        final Instances newInstances = shih.getModifiedInstances();

        System.out.println(newInstances);

        //final List<String> className = WekaUtil.getClassesValues(oldInstances);
        /*
        //
        //2-step algorithm
        //
        int K = (int)(oldInstances.numInstances()/3.0);
        System.out.println("K="+K);
        List<IndexedInstance> subSets = WekaUtil2.doHAC(newInstances, K);
                
                
        final List<MixedCentroid> lCentroids = new ArrayList<MixedCentroid>();
        for(int i = 0 ; i < subSets.size() ;i++){
        //Build original subset from numeric subset to add additionnal info to centroids
        final IndexedInstance subSet = subSets.get(i);
        Map<Integer,Integer> mapIdx = subSet.getMapOrigIndex();
        Instances origSubset = new Instances(oldInstances,0);
        for(Integer rowIndex : mapIdx.keySet()){
           origSubset.add(oldInstances.instance(mapIdx.get(rowIndex)));
        }
        MixedCentroid centroid = WekaUtil2.computeMixedCentroid(false, new EuclideanDistance(subSet.getInstances()), subSet.getInstances(), origSubset,i);
        //System.out.println(centroid);
        lCentroids.add(centroid);
        }
                
                
                
        final SortedSet<TupleSI> domain = shih.getDomain();
        final int lCentroidsSize = lCentroids.size();
        final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
        if(lCentroidsSize > 0){
        for(int i = 0 ; i < newInstances.numAttributes() ; i++){
           lAttrs.add(newInstances.attribute(i));
        }
        for(TupleSI d : domain){
           lAttrs.add(new Attribute(d.getY()+"("+d.getX()+")"));
        }
        //Create centroid instances
        final Instances centroidInstances = new Instances("Centroid instance",lAttrs,lCentroidsSize);
        for(int i = 0 ; i < lCentroids.size() ; i++){
           double[] m11 = lCentroids.get(i).getMixedCentroid(domain);
           centroidInstances.add(new DenseInstance(1d,m11));
        }
                
        K = 5;
        SimpleKMeans mdf1 = new SimpleKMeans();
        mdf1.setOptions(Utils.splitOptions("-N " + K +" -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance"));
        mdf1.buildClusterer(centroidInstances);
        List<IndexedInstance> lInstances = WekaUtil2.computeClusters(mdf1,centroidInstances);
        Instances initialCentroid = new Instances(newInstances,0);
        for(IndexedInstance instances : lInstances){
           Instance centroid = WekaMachineLearningUtil.computeCentroid(false,new EuclideanDistance(instances.getInstances()),instances.getInstances());
           int nbCentroids = centroid.numAttributes();
           for(int j = nbCentroids -1 ; j >= newInstances.numAttributes(); j--){
              centroid.deleteAttributeAt(j);
           }
           initialCentroid.add(centroid);
        }
                
                
        //System.out.println(initialCentroid);
                
        ModifiedKMeans mdf = new ModifiedKMeans(initialCentroid);
        mdf.setOptions(Utils.splitOptions("-N " + K +" -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance"));
        mdf.setInitializeUsingKMeansPlusPlusMethod(true);
        mdf.buildClusterer(newInstances);
        final ClusterEvaluation eval=new ClusterEvaluation();
        eval.setClusterer(mdf);
        eval.evaluateClusterer(newInstances);
                
                
                
        List<IndexedInstance> lis = WekaUtil2.computeClusters(mdf, newInstances);
        List<Instances> result = new ArrayList<Instances>();
        lAttrs.clear();
        for(int i = 0 ; i < oldInstances.numAttributes() ; i++){
           lAttrs.add(oldInstances.attribute(i));
        }
        //lAttrs.add(oldInstances.attribute(clsIndex));
        Map<Integer,Map<String,Integer>> clusterRepartionClass = new HashMap<Integer, Map<String,Integer>>();
        List<String> lValues = new ArrayList<String>();
        for(int i = 0 ; i < K ; i++){
           lValues.add(String.valueOf(i));
        }
        int k=0;
        lAttrs.add(new Attribute("[cluster]",lValues));
        for(IndexedInstance idxInstances : lis){
           Instances instances = idxInstances.getInstances();
           Instances rInstances = new Instances("DTA instance",lAttrs,oldInstances.numInstances());
           rInstances.setClassIndex(instances.numAttributes());
           for(int i = 0 ; i < instances.numInstances() ; i++){
              int mappingIdx = idxInstances.getMapOrigIndex().get(i);
              double[] attValues = new double[instances.numAttributes()+2];
              DenseInstance instance = new DenseInstance(1d, attValues);
              int j = 0;
              for(j = 0 ; j < instances.numAttributes() ; j++){
          attValues[j] = oldInstances.instance(mappingIdx).value(j);
              }
              int clsValue = (int)oldInstances.instance(mappingIdx).value(clsIndex);
              j = instances.numAttributes();
              attValues[j]=clsValue;
              attValues[j+1]=k;
              rInstances.add(instance);
           }
           result.add(rInstances);
                
           //Build repartition class
           Map<String,Integer> repartionClass = new HashMap<String, Integer>();
           for(int i = 0 ; i < instances.numInstances() ; i++){
              String clsValue = oldInstances.attribute(oldInstances.classIndex()).value((int)oldInstances.instance(idxInstances.getMapOrigIndex().get(i)).value(oldInstances.classIndex()));
              if(!repartionClass.containsKey(clsValue)) repartionClass.put(clsValue,0);
              repartionClass.put(clsValue,repartionClass.get(clsValue)+1);
           }
           clusterRepartionClass.put(k, repartionClass);
           k++;
        }
                
        //Used result to discretize & apriori
        int ss = shih.getIdxsN().size();
        k = 0;
        Map<Integer,AssociationRules> rulePerCluster = new HashMap<Integer, AssociationRules>();
        for(Instances instances : result){
           int[] arrayToDiscretize = new int[ss+1];
           for(int i = 0 ; i <ss ; i++){
              arrayToDiscretize[i] = shih.getIdxsN().get(i);
           }
                
           Apriori ap = new Apriori();
           Discretize disc = new Discretize();
           disc.setOptions(Utils.splitOptions("-B 10"));
           disc.setAttributeIndicesArray(arrayToDiscretize);
           disc.setInputFormat(instances);
                
           //disc.set
           Instances discInstances = Filter.useFilter(instances, disc);
                
           //System.out.println(discInstances);
                
           ap.buildAssociations(discInstances);
           AssociationRules rules = ap.getAssociationRules();
           rulePerCluster.put(k,rules);
           k++;
        }
                
        double confidenceFactor = 0.5d;
        final int frameWidth=1250;//1024;
        final int frameHeight=950;//768;
        try 
        {
           final EventPublisher eventPublisher = new EventPublisherBushImpl();            
                
           Instances ai = WekaMachineLearningUtil.buildDataSetExplainingClustersAssignment(result,"cluster",true);            
           DecisionTree dt    = new C45DecisionTreeFactory(confidenceFactor).buildDecisionTree(ai);
           GraphWithOperations gwo =  dt.getGraphWithOperations();
           final GraphView myGraph = DecisionTreeToGraphViewHelper.buildGraphView(gwo,ai,eventPublisher,new CommandDispatcherFakeImpl());
                
           //Compute association rule string for showing
           StringBuilder resume = new StringBuilder("<html>\n");
           for(Integer clusterNum : rulePerCluster.keySet()){
              resume.append("<b><font color='red'>Cluster : ").append(clusterNum+1).append("</font></b><br/>");
              resume.append("Repartition : ").append(clusterRepartionClass.get(clusterNum)).append("<br/><br/>");   
              for(AssociationRule rule : rulePerCluster.get(clusterNum).getRules()){
          if(!rule.getPremise().toString().contains("cluster")
                &&!rule.getConsequence().toString().contains("cluster")
                &&!rule.getPremise().toString().contains("'All'")
                &&!rule.getConsequence().toString().contains("'All'")
                ){ //Do not put cluster rules & attribute with all same value
             resume.append(rule).append("<br/>");
          }
              }
              resume.append("<br/>");
           }
           resume.append("<br/><b><font color='red'>Full set rules :</font></b><br/>");
           int[] arrayToDiscretize = new int[ss+1];
           for(int i = 0 ; i <ss ; i++){arrayToDiscretize[i] = shih.getIdxsN().get(i);}
           Apriori ap = new Apriori();
           Discretize disc = new Discretize();
           disc.setOptions(Utils.splitOptions("-B 10"));
           disc.setAttributeIndicesArray(arrayToDiscretize);
           disc.setInputFormat(oldInstances);
           Instances discInstances = Filter.useFilter(oldInstances, disc);
           ap.buildAssociations(discInstances);
           AssociationRules rules = ap.getAssociationRules();
           for(AssociationRule rule : rules.getRules()){
              resume.append(rule).append("<br/>");
           }
           resume.append("</html>");
           //END Compute association rule string for showing
                
           final JXFrame f2=new JXFrame();
           LogoHelper.setLogo(f2);
           f2.setTitle("Rules for each cluster");
           f2.setLayout(new BorderLayout());
           f2.setPreferredSize(new Dimension(frameWidth,frameHeight));
           JScrollPane scrollPane = new JScrollPane(new JLabel(resume.toString()));
           f2.add(scrollPane);
           f2.pack();
           f2.setResizable(false);
           f2.setVisible(true);
                
                
           final JXFrame f3=new JXFrame();
           LogoHelper.setLogo(f3);
           f3.setTitle("Cluster assignement view");
           f3.setLayout(new BorderLayout());
           f3.setPreferredSize(new Dimension(frameWidth,frameHeight));
           f3.add(myGraph.asComponent());
           f3.pack();
           f3.setResizable(false);
           f3.setVisible(true);
                
                
           for(int i = 0 ; i < K ; i++){
              List<CNode> fs = gwo.findNodeByName("cluster"+(i+1));
              GraphWithOperations gwo2 = GraphUtil.filterGraphWithFinalState(gwo,fs.get(0));
              final GraphView myGraphFilter = DecisionTreeToGraphViewHelper.buildGraphView(gwo2,null,eventPublisher,new CommandDispatcherFakeImpl());
              final JXFrame f4=new JXFrame();
              LogoHelper.setLogo(f4);
              f4.setTitle("Cluster filtred view for cluster : " + i);
              f4.setLayout(new BorderLayout());
              f4.setPreferredSize(new Dimension(frameWidth,frameHeight));
              f4.add(myGraphFilter.asComponent());
              f4.pack();
              f4.setResizable(false);
              f4.setVisible(true);
           }
                
                
        } 
        catch (Exception e) 
        {
           e.printStackTrace();
        }
                
                
        }   
         */

    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * /*  w  w w  .j a  v a 2  s.co  m*/
 */
private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df,
        final int maxInstances) throws Exception {
    Instances centroids = null;
    List<Instances> clusters = null;

    final int savedClassIndex = instances.classIndex();
    instances.setClassIndex(-1);
    final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df);
    clusterer.buildClusterer(instances);
    clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList();
    instances.setClassIndex(savedClassIndex);
    final int numClusters = clusters.size();
    //Set class index for each cluster instances
    //System.out.println("Setting class index to each cluster : " + savedClassIndex);
    for (int i = 0; i < numClusters; i++) {
        clusters.get(i).setClassIndex(savedClassIndex);
    }
    //Save centroids
    centroids = clusterer.getClusterCentroids();

    return new KmeansResult(centroids, clusters);
}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * /*from  w  w  w. j a va2 s .com*/
 */
public static CollapsedInstances distanceBetweenInstances(final Instances instances,
        final MDSDistancesEnum distEnum, final int maxInstances, final boolean ignoreClassInDistance)
        throws Exception {
    KmeansResult mapCentroids = null;

    final NormalizableDistance usedDist;
    if (distEnum.equals(MDSDistancesEnum.EUCLIDEAN)) {
        usedDist = new EuclideanDistance(instances);
        //usedDist.setDontNormalize(true);
        //usedDist.setAttributeIndices("1");
        //usedDist.setInvertSelection(true);
    } else if (distEnum.equals(MDSDistancesEnum.MANHATTAN))
        usedDist = new ManhattanDistance(instances);
    else if (distEnum.equals(MDSDistancesEnum.MINKOWSKI)) {
        usedDist = new MinkowskiDistance(instances);
        final String[] parameters = MDSDistancesEnum.MINKOWSKI.getParameters();
        //Change order
        double order = Double.valueOf(parameters[0]).doubleValue();
        ((MinkowskiDistance) usedDist).setOrder(order);
    } else if (distEnum.equals(MDSDistancesEnum.CHEBYSHEV))
        usedDist = new ChebyshevDistance(instances);
    //else if (distEnum.equals(MDSDistancesEnum.DT)) usedDist=new DTDistance(instances);
    else
        throw new IllegalStateException();

    final int numInstances = instances.numInstances();
    final boolean collapsed = (numInstances > maxInstances)
            && (distEnum.equals(MDSDistancesEnum.EUCLIDEAN) || distEnum.equals(MDSDistancesEnum.MANHATTAN));

    SimpleMatrix distances;

    //Ignore class in distance
    if (ignoreClassInDistance && instances.classIndex() != -1) {
        usedDist.setAttributeIndices("" + (instances.classIndex() + 1));
        usedDist.setInvertSelection(true);
    }

    int numCollapsedInstances = numInstances;
    if (collapsed) {
        //Compute distance with centroids using K-means with K=MAX_INSTANCES
        mapCentroids = getSimplifiedInstances(instances, usedDist, maxInstances);

        final List<Instance> centroids = mapCentroids.getCentroids();
        numCollapsedInstances = centroids.size();

        distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances);

        for (int i = 0; i < numCollapsedInstances; i++) {
            for (int j = i + 1; j < numCollapsedInstances; j++) {
                double dist = usedDist.distance(centroids.get(i), centroids.get(j));
                distances.set(i, j, dist);
                distances.set(j, i, dist);
            }
        }
    } else {
        distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances);
        for (int i = 0; i < numCollapsedInstances; i++) {
            for (int j = i + 1; j < numCollapsedInstances; j++) {
                double dist = usedDist.distance(instances.get(i), instances.get(j));
                distances.set(i, j, dist);
                distances.set(j, i, dist);
            }
        }
    }
    return new CollapsedInstances(instances, mapCentroids, distances, collapsed);
}

From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java

License:Open Source License

/**
 * //from ww w .  ja  v a2 s.c o m
 * @param instance
 * @param instances
 * @param mapAlias
 * @return
 */
private static Integer getStrongestClass(final Integer centroidIndex, final CollapsedInstances mds) {
    final KmeansResult mapCentroid = mds.getCentroidMap();
    final Instances newInstances = mapCentroid.getClusters().get(centroidIndex);
    final int classIndex = newInstances.classIndex();
    final AttributeStats classAttributeStats = newInstances.attributeStats(classIndex);
    int maxIndex = -1;
    int max = -1;
    for (int i = 0; i < classAttributeStats.nominalCounts.length; i++) {
        final int currentCount = classAttributeStats.nominalCounts[i];
        if (currentCount > max) {
            max = currentCount;
            maxIndex = i;
        }
    }

    // Problem with that line :-(
    return maxIndex;
}

From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java

License:Open Source License

/**
 * //from   w w  w . ja  v  a2  s  .c o  m
 */
private static void buildFilteredSeries(final MDSResult mdsResult, final XYPlot xyPlot,
        final String... attrNameToUseAsPointTitle) throws Exception {

    final CollapsedInstances distMdsRes = mdsResult.getCInstances();
    final Instances instances = distMdsRes.getInstances();

    final SimpleMatrix coordinates = mdsResult.getCoordinates();

    final Instances collapsedInstances = mdsResult.getCollapsedInstances();
    int maxSize = 0;
    if (distMdsRes.isCollapsed()) {
        final List<Instances> clusters = distMdsRes.getCentroidMap().getClusters();
        final int nbCentroids = clusters.size();
        maxSize = clusters.get(0).size();
        for (int i = 1; i < nbCentroids; i++) {
            final int currentSize = clusters.get(i).size();
            if (currentSize > maxSize) {
                maxSize = currentSize;
            }
        }
    }

    Attribute clsAttribute = null;
    int nbClass = 1;
    if (instances.classIndex() != -1) {
        clsAttribute = instances.classAttribute();
        nbClass = clsAttribute.numValues();
    }
    final XYSeriesCollection dataset = (XYSeriesCollection) xyPlot.getDataset();
    final int fMaxSize = maxSize;

    final List<XYSeries> lseries = new ArrayList<XYSeries>();

    //No class : add one dummy serie
    if (nbClass <= 1) {
        lseries.add(new XYSeries("Serie #1", false));
    } else {
        //Some class : add one serie per class
        for (int i = 0; i < nbClass; i++) {
            lseries.add(new XYSeries(clsAttribute.value(i), false));
        }
    }
    dataset.removeAllSeries();

    /**
     * Initialize filtered series
     */
    final List<Instances> filteredInstances = new ArrayList<Instances>();
    for (int i = 0; i < lseries.size(); i++) {
        filteredInstances.add(new Instances(collapsedInstances, 0));
    }

    final Map<Tuple<Integer, Integer>, Integer> correspondanceMap = new HashMap<Tuple<Integer, Integer>, Integer>();
    for (int i = 0; i < collapsedInstances.numInstances(); i++) {
        final Instance oInst = collapsedInstances.instance(i);
        int indexOfSerie = 0;
        if (oInst.classIndex() != -1) {
            if (distMdsRes.isCollapsed()) {
                indexOfSerie = getStrongestClass(i, distMdsRes);
            } else {
                indexOfSerie = (int) oInst.value(oInst.classAttribute());
            }
        }
        lseries.get(indexOfSerie).add(coordinates.get(i, 0), coordinates.get(i, 1));

        filteredInstances.get(indexOfSerie).add(oInst);
        if (distMdsRes.isCollapsed()) {
            correspondanceMap.put(new Tuple<Integer, Integer>(indexOfSerie,
                    filteredInstances.get(indexOfSerie).numInstances() - 1), i);
        }
    }

    final List<Paint> colors = new ArrayList<Paint>();

    for (final XYSeries series : lseries) {
        dataset.addSeries(series);
    }

    if (distMdsRes.isCollapsed()) {
        final XYLineAndShapeRenderer xyRenderer = new XYLineAndShapeRenderer(false, true) {
            private static final long serialVersionUID = -6019883886470934528L;

            @Override
            public void drawItem(Graphics2D g2, XYItemRendererState state, java.awt.geom.Rectangle2D dataArea,
                    PlotRenderingInfo info, XYPlot plot, ValueAxis domainAxis, ValueAxis rangeAxis,
                    XYDataset dataset, int series, int item, CrosshairState crosshairState, int pass) {

                if (distMdsRes.isCollapsed()) {

                    final Integer centroidIndex = correspondanceMap
                            .get(new Tuple<Integer, Integer>(series, item));
                    final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex);
                    int size = cluster.size();

                    final int shapeSize = (int) (MAX_POINT_SIZE * size / fMaxSize + 1);

                    final double x1 = plot.getDataset().getX(series, item).doubleValue();
                    final double y1 = plot.getDataset().getY(series, item).doubleValue();

                    Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>();
                    mapRepartition.put("No class", size);
                    if (cluster.classIndex() != -1) {
                        mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster);
                    }

                    final RectangleEdge xAxisLocation = plot.getDomainAxisEdge();
                    final RectangleEdge yAxisLocation = plot.getRangeAxisEdge();
                    final double fx = domainAxis.valueToJava2D(x1, dataArea, xAxisLocation);
                    final double fy = rangeAxis.valueToJava2D(y1, dataArea, yAxisLocation);

                    setSeriesShape(series,
                            new Ellipse2D.Double(-shapeSize / 2, -shapeSize / 2, shapeSize, shapeSize));

                    super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series,
                            item, crosshairState, pass);

                    //Draw pie
                    if (ENABLE_PIE_SHART) {
                        createPieChart(g2, (int) (fx - shapeSize / 2), (int) (fy - shapeSize / 2), shapeSize,
                                mapRepartition, size, colors);
                    }

                } else {

                    super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series,
                            item, crosshairState, pass);

                }

            }

        };

        xyPlot.setRenderer(xyRenderer);
    }

    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            if (distMdsRes.isCollapsed()) {
                final StringBuilder res = new StringBuilder("<html>");
                final Integer centroidIndex = correspondanceMap.get(new Tuple<Integer, Integer>(series, item));
                final Instance centroid = distMdsRes.getCentroidMap().getCentroids().get(centroidIndex);
                final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex);

                //Set same class index for cluster than for original instances
                //System.out.println("Cluster index = "  + cluster.classIndex() + "/" + instances.classIndex());
                cluster.setClassIndex(instances.classIndex());

                Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>();
                mapRepartition.put("No class", cluster.size());
                if (cluster.classIndex() != -1) {
                    mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster);
                }
                res.append(InstanceFormatter.htmlFormat(centroid, false)).append("<br/>");
                for (final Map.Entry<Object, Integer> entry : mapRepartition.entrySet()) {
                    if (entry.getValue() != 0) {
                        res.append("Class :<b>'" + StringEscapeUtils.escapeHtml(entry.getKey().toString())
                                + "</b>' -> " + entry.getValue()).append("<br/>");
                    }
                }
                res.append("</html>");
                return res.toString();
            } else {
                //return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item),true);
                return InstanceFormatter.shortHtmlFormat(filteredInstances.get(series).instance(item));
            }
        }
    };

    final Shape shape = new Ellipse2D.Float(0f, 0f, MAX_POINT_SIZE, MAX_POINT_SIZE);

    ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true);

    for (int p = 0; p < nbClass; p++) {
        xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen);
        ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape);
        xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK);
    }

    for (int ii = 0; ii < nbClass; ii++) {
        colors.add(xyPlot.getRenderer().getItemPaint(ii, 0));
    }

    if (attrNameToUseAsPointTitle.length > 0) {
        final Attribute attrToUseAsPointTitle = instances.attribute(attrNameToUseAsPointTitle[0]);
        if (attrToUseAsPointTitle != null) {
            final XYItemLabelGenerator lg = new XYItemLabelGenerator() {
                @Override
                public String generateLabel(final XYDataset dataset, final int series, final int item) {
                    return filteredInstances.get(series).instance(item).stringValue(attrToUseAsPointTitle);
                }
            };
            xyPlot.getRenderer().setBaseItemLabelGenerator(lg);
            xyPlot.getRenderer().setBaseItemLabelsVisible(true);
        }
    }
}

From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java

License:Open Source License

/**
 * //from   www  .j  av a  2  s .  c om
 * @param clusters
 */
public static void buildKMeansChart(final List<Instances> clusters) {
    final XYSeriesCollection dataset = new XYSeriesCollection();

    final JFreeChart chart = ChartFactory.createScatterPlot("", // title 
            "X", "Y", // axis labels 
            dataset, // dataset 
            PlotOrientation.VERTICAL, true, // legend? yes 
            true, // tooltips? yes 
            false // URLs? no 
    );

    final XYPlot xyPlot = (XYPlot) chart.getPlot();

    ((NumberAxis) xyPlot.getDomainAxis()).setTickUnit(new NumberTickUnit(2.0));
    ((NumberAxis) xyPlot.getRangeAxis()).setTickUnit(new NumberTickUnit(2.0));

    Attribute clsAttribute = null;
    int nbClass = 1;
    Instances cluster0 = clusters.get(0);
    if (cluster0.classIndex() != -1) {
        clsAttribute = cluster0.classAttribute();
        nbClass = clsAttribute.numValues();
    }
    if (nbClass <= 1) {
        dataset.addSeries(new XYSeries("Serie #1", false));
    } else {
        for (int i = 0; i < nbClass; i++) {
            dataset.addSeries(new XYSeries(clsAttribute.value(i), false));
        }
    }

    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            return "TODO";
        }
    };

    for (int i = 0; i < nbClass; i++) {
        dataset.getSeries(i).clear();
        xyPlot.getRenderer().setSeriesToolTipGenerator(i, gen);
    }

    final int nbClusters = clusters.size();
    for (int i = 0; i < nbClusters; i++) {
        Instances instances = clusters.get(i);
        final int nbInstances = instances.numInstances();
        for (int j = 0; j < nbInstances; j++) {
            final Instance oInst = instances.instance(j);
            dataset.getSeries(i).add(oInst.value(0), oInst.value(1));
        }
    }

    final TitledBorder titleBorder = new TitledBorder("Kmeans of projection");
    ChartPanel chartPanel = new ChartPanel(chart);
    chartPanel.setMouseWheelEnabled(true);
    chartPanel.setPreferredSize(new Dimension(1200, 900));
    chartPanel.setBorder(titleBorder);
    chartPanel.setBackground(Color.WHITE);

    JXFrame frame = new JXFrame();
    frame.getContentPane().add(chartPanel);
    frame.setVisible(true);
    frame.pack();

}