List of usage examples for weka.clusterers HierarchicalClusterer setPrintNewick
public void setPrintNewick(boolean bPrintNewick)
From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering.// ww w .java2 s .c om * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(5); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; System.out.println("Instances: " + instances.toSummaryString()); System.out.println("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering./*from w w w .ja v a 2 s .c om*/ * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); Attribute duration = new Attribute("duration"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); attr.add(duration); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(6); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); inst.setValue(duration, event.getEndMinute() - event.getStartMinute()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; log.info("Instances: " + instances.toSummaryString()); log.info("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.hierarchical.HierarClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] options = new String[5]; LinkType link = parameters.getParameter(HierarClustererParameters.linkType).getValue(); DistanceType distanceType = parameters.getParameter(HierarClustererParameters.distanceType).getValue(); options[0] = "-L"; options[1] = link.name();//w w w. jav a 2s . c om options[2] = "-A"; switch (distanceType) { case EUCLIDIAN: options[3] = "weka.core.EuclideanDistance"; break; case CHEBYSHEV: options[3] = "weka.core.ChebyshevDistance"; break; case MANHATTAN: options[3] = "weka.core.ManhattanDistance"; break; case MINKOWSKI: options[3] = "weka.core.MinkowskiDistance"; break; } options[4] = "-P"; try { clusterer.setOptions(options); clusterer.setPrintNewick(true); clusterer.buildClusterer(dataset); // clusterer.graph() gives only the first cluster and in the case // there // are more than one cluster the variables in the second cluster are // missing. // I'm using clusterer.toString() which contains all the clusters in // Newick format. ClusteringResult result = new ClusteringResult(null, clusterer.toString(), clusterer.getNumClusters(), null); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:nl.uva.sne.classifiers.Hierarchical.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {//from ww w . j a v a 2 s . c om Instances data = ClusterUtils.terms2Instances(inDir, false); // ArffSaver s = new ArffSaver(); // s.setInstances(data); // s.setFile(new File(inDir+"/dataset.arff")); // s.writeBatch(); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Minkowski": df = new MinkowskiDistance(data); break; case "Euclidean": df = new EuclideanDistance(data); break; case "Chebyshev": df = new ChebyshevDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } Logger.getLogger(Hierarchical.class.getName()).log(Level.INFO, "Start clusteing"); weka.clusterers.HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); clusterer.setDebug(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); clusterer.setDistanceIsBranchLength(true); clusterer.setPrintNewick(false); weka.clusterers.FilteredClusterer fc = new weka.clusterers.FilteredClusterer(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // we want to ignore the attribute that is in the position '1' Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options fc.setFilter(remove); //add filter to remove attributes fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer fc.buildClusterer(data); // // Print normal // clusterer.setPrintNewick(false); // System.out.println(clusterer.graph()); // // Print Newick // clusterer.setPrintNewick(true); // System.out.println(clusterer.graph()); // // // Let's try to show this clustered data! // JFrame mainFrame = new JFrame("Weka Test"); // mainFrame.setSize(600, 400); // mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); // Container content = mainFrame.getContentPane(); // content.setLayout(new GridLayout(1, 1)); // // HierarchyVisualizer visualizer = new HierarchyVisualizer(clusterer.graph()); // content.add(visualizer); // // mainFrame.setVisible(true); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Hierarchical.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:swm.project.mappings.UserToUserCluster.java
private void clusterWithDBSCAN() throws FileNotFoundException, IOException, Exception { Reader reader;//w ww.j a va2 s . co m reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS); Instances instanceValues = new Instances(reader); HierarchicalClusterer h = new HierarchicalClusterer(); h.setNumClusters(20); h.setPrintNewick(true); h.buildClusterer(instanceValues); }