Example usage for weka.clusterers SimpleKMeans setDistanceFunction

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans setDistanceFunction.

Prototype

public void setDistanceFunction(DistanceFunction df) throws Exception

Source Link

Document

sets the distance function to use for instance comparison.

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {/*from   w  w w  .ja v  a2 s .  c o  m*/
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:entities.ArffFile.java

/**
 * Dada una lista de parametros, se ejecuta el filtro de microagregacion.
 * Todos estos parametros son entrada del usuario.
 * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada.
 * @param numCluster/*from   w  w w  .j  a v  a2 s.com*/
 * @param seed
 * @param maxIterations
 * @param replaceMissingValues
 * @param preserveInstancesOrder
 * @param attributes lista de los atributos que se desean generalizar con cluster
 */
public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations,
        boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes)
        throws Exception {
    //instancesFilter = new Instances(instances);
    SimpleKMeans kMeans;
    kMeans = new SimpleKMeans();
    Instances uniqueAttributes;
    uniqueAttributes = new Instances(instancesFilter);
    List<String> names = new ArrayList<>();
    int i = 0;
    for (Integer attribute : attributes) {
        String name = new String(instancesFilter.attribute(attribute).name());
        if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString())
            throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING");
        names.add(name);
    }
    while (uniqueAttributes.numAttributes() != attributes.size()) {
        if (!names.contains(uniqueAttributes.attribute(i).name()))
            uniqueAttributes.deleteAttributeAt(i);
        else
            i++;
    }
    try {
        kMeans.setNumClusters(numCluster);
        kMeans.setMaxIterations(maxIterations);
        kMeans.setSeed(seed);
        kMeans.setDisplayStdDevs(false);
        kMeans.setDistanceFunction(df);
        kMeans.setDontReplaceMissingValues(replaceMissingValues);
        kMeans.setPreserveInstancesOrder(preserveInstancesOrder);
        kMeans.buildClusterer(uniqueAttributes);
        //System.out.println(kMeans);
        for (int j = 0; j < uniqueAttributes.numInstances(); j++) {
            int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j));
            for (int k = 0; k < uniqueAttributes.numAttributes(); k++) {
                if (uniqueAttributes.attribute(k).isNumeric())
                    uniqueAttributes.instance(j).setValue(k,
                            Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k)));
                else
                    uniqueAttributes.instance(j).setValue(k,
                            kMeans.getClusterCentroids().instance(cluster).toString(k));
            }
        }
        replaceValues(uniqueAttributes, attributes);
    } catch (Exception ex) {
        Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    //saveToFile("4");
}

From source file:graph.clustering.NodeClusterer.java

License:Apache License

private int[] performClustering(Instances clusterTrainingSet, int numOfClusters) {
    String[] options = new String[7];
    options[0] = "-N"; // num of clusters
    options[1] = String.valueOf(numOfClusters);
    options[2] = "-I"; // max num of iterations
    options[3] = "500";
    options[4] = "-S"; // the random seed number
    options[5] = "10";
    options[6] = "-O"; // preserve instance order

    String[] distanceOptions = new String[2];
    distanceOptions[0] = "-R"; // attribute indices
    distanceOptions[1] = "first-last";

    EuclideanDistance distanceFunc = new EuclideanDistance();
    SimpleKMeans clusterer = new SimpleKMeans();
    int[] assignments = null;
    try {/*from   w  w w  . j  a v a  2 s . c o  m*/
        distanceFunc.setOptions(distanceOptions);

        clusterer.setOptions(options);
        clusterer.setDistanceFunction(distanceFunc);
        clusterer.buildClusterer(clusterTrainingSet);

        assignments = clusterer.getAssignments();
    } catch (Exception e1) {
        System.out.println("Error in clustering:");
        e1.printStackTrace();
    }

    return assignments;
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}//from w  ww  .j a va2 s  . co  m
 * 
 * <p>
 * This method is specialized for <b>kmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models
        Instances instances = getInstances(behaviorModelsAbsolute);

        // KMeans --> Weka
        SimpleKMeans kmeans = new SimpleKMeans();

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // kmeans.setDistanceFunction(manhattanDistance);

        // distance function with option don*t normalize
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        kmeans.setDistanceFunction(euclideanDistance);
        kmeans.setPreserveInstancesOrder(true);

        int[] clustersize = null;
        int[] assignments = null;

        // get number of clusters to be generated.
        int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());

        // clustering
        for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) {
            // must be specified in a fix way
            kmeans.setNumClusters(clusterSize);

            // build cluster
            kmeans.buildClusterer(instances);

            clustersize = kmeans.getClusterSizes();
            assignments = kmeans.getAssignments();

            ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
            clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids());
            clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances,
                    assignments);
            clusteringMetrics.calculateBetas();

            clusteringMetrics.printErrorMetricsHeader();
            clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances());
            clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
            // clusteringMetrics.printClusterAssignmentsToSession(assignments,
            // clusterSize);

        }

        Instances resultingCentroids = kmeans.getClusterCentroids();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:nl.uva.sne.classifiers.Kmeans.java

@Override
public Map<String, String> cluster(String inDir) throws IOException, ParseException {
    try {/*  ww  w.j  ava 2s . co  m*/

        Instances data = ClusterUtils.terms2Instances(inDir, false);

        DistanceFunction df;
        //            SimpleKMeans currently only supports the Euclidean and Manhattan distances.
        switch (distanceFunction) {
        case "Euclidean":
            df = new EuclideanDistance(data);
            break;
        case "Manhattan":
            df = new ManhattanDistance(data);
            break;
        default:
            df = new EuclideanDistance(data);
            break;
        }

        SimpleKMeans clusterer = new SimpleKMeans();

        Random rand = new Random(System.currentTimeMillis());
        int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000;
        clusterer.setSeed(seed);
        clusterer.setMaxIterations(1000000000);
        Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing");
        clusterer.setPreserveInstancesOrder(true);

        clusterer.setNumClusters(numOfClusters);
        clusterer.setDistanceFunction(df);

        return ClusterUtils.bulidClusters(clusterer, data, inDir);

    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setPreserveInstancesOrder(/*from   w w w .ja v a  2 s .  co  m*/
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE)));
    kmeans.setDontReplaceMissingValues(Boolean
            .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES)));
    kmeans.setDisplayStdDevs(
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS)));
    kmeans.setMaxIterations(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS)));
    kmeans.setNumClusters(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS)));
    kmeans.setSeed(10);
    //kmeans.setSeed(
    //      Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED)));
    kmeans.setDistanceFunction(euclideanDistance);
    kmeans.buildClusterer(inputInstances);

    WekaUtils.saveModel(kmeans, outPath);

    /*
    *
    * Pour obtenir les pourcentages de les clusters
    * ClusterEvaluation eval = new ClusterEvaluation();
    * eval.setClusterer(kmeans);
    * eval.evaluateClusterer(inputInstances);
    * System.out.println(eval.clusterResultsToString());
    *
    * */

    return kmeans;
}

From source file:swm.project.mappings.UserToUserCluster.java

private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception {
    Reader reader;/*from www .j av  a 2 s .  co  m*/
    userToUserClusterHistory = new HashMap<>();
    userClustersToUsersHistory = new HashMap<>();
    reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS);
    Instances instanceValues = new Instances(reader);
    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setNumClusters(20);
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setDistanceFunction(new EuclideanDistance());
    kmeans.buildClusterer(instanceValues);

    int[] assignments = kmeans.getAssignments();
    int userid = 0;
    for (int clusterNo : assignments) {
        int user = (int) instanceValues.get(userid).value(0);
        userToUserClusterHistory.put(user, clusterNo);
        ArrayList<Integer> users = new ArrayList<>();
        if (userClustersToUsersHistory.containsKey(clusterNo)) {
            users = userClustersToUsersHistory.get(clusterNo);
            users.add(user);
        } else {
            users.add(user);
            userClustersToUsersHistory.put(clusterNo, users);
        }
        userid++;

    }
}