List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
@Override protected Instances process(Instances instances) throws Exception { Instances result = new Instances(getOutputFormat(), 0); // Convert all instances w/o normalization ArrayList<Instance> converted = new ArrayList<Instance>(); ArrayList<Double> docLengths = new ArrayList<Double>(); if (!isFirstBatchDone()) { m_AvgDocLength = 0;/*from w w w.j a va 2 s.c om*/ } for (int i = 0; i < instances.size(); i++) { double docLength = convertInstancewoDocNorm(instances.instance(i), converted); // Need to compute average document length if necessary if (m_filterType != FILTER_NONE) { if (!isFirstBatchDone()) { m_AvgDocLength += docLength; } docLengths.add(docLength); } } if (m_filterType != FILTER_NONE) { if (!isFirstBatchDone()) { m_AvgDocLength /= instances.size(); } // Perform normalization if necessary. if (isFirstBatchDone() || (!isFirstBatchDone() && m_filterType == FILTER_NORMALIZE_ALL)) { for (int i = 0; i < converted.size(); i++) { normalizeInstance(converted.get(i), docLengths.get(i)); } } } // Push all instances into the output queue for (int i = 0; i < converted.size(); i++) { result.add(converted.get(i)); } return result; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {//from w w w . j av a 2 s . com ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering./*w ww. ja v a2 s . com*/ * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(5); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; System.out.println("Instances: " + instances.toSummaryString()); System.out.println("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering./*from w w w . j a va2s . co m*/ * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); Attribute duration = new Attribute("duration"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); attr.add(duration); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(6); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); inst.setValue(duration, event.getEndMinute() - event.getStartMinute()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; log.info("Instances: " + instances.toSummaryString()); log.info("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:eu.cassandra.server.mongo.csn.MongoCluster.java
License:Apache License
/** * /*from w w w . j ava 2 s .c o m*/ * @param clusterBasedOn * @param graph_id * @param httpHeaders * @return */ private Instances getInstances(String clusterBasedOn, String graph_id) { FastVector attributes = new FastVector(); if (clusterBasedOn.equalsIgnoreCase("hoursP") || clusterBasedOn.equalsIgnoreCase("hoursQ") || clusterBasedOn.equalsIgnoreCase("hoursE")) { for (int i = 0; i < 24; i++) { attributes.addElement(new Attribute("att" + i)); } } else { attributes.addElement(new Attribute("att0")); } Instances instances = new Instances("data", attributes, 0); DBCursor nodes = DBConn.getConn().getCollection(MongoGraphs.COL_CSN_NODES) .find(new BasicDBObject("graph_id", graph_id)); //Get all nodes while (nodes.hasNext()) { double[] values = null; DBObject installationDBObj = nodes.next(); nodeIDs.add(installationDBObj.get("_id").toString()); //If graph was build based on Person or Installation Type do nothing if (clusterBasedOn.equalsIgnoreCase(MongoEdges.PersonType) || clusterBasedOn.equalsIgnoreCase(MongoEdges.InstallationType) || clusterBasedOn.equalsIgnoreCase(MongoEdges.TransformerID) || clusterBasedOn.equalsIgnoreCase(MongoEdges.TopologicalDistance) || clusterBasedOn.equalsIgnoreCase(MongoEdges.Location) || clusterBasedOn.equalsIgnoreCase(MongoEdges.Location) || clusterBasedOn.equalsIgnoreCase(MongoEdges.SocialDistance)) { continue; } else { Object vS = installationDBObj.get(CSNTypes.getCsnTypes(clusterBasedOn)); if (vS != null) { if (clusterBasedOn.equalsIgnoreCase("hoursP") || clusterBasedOn.equalsIgnoreCase("hoursQ") || clusterBasedOn.equalsIgnoreCase("hoursE")) { if (vS instanceof BasicDBList) { BasicDBList v = (BasicDBList) vS; values = new double[v.size()]; for (int i = 0; i < v.size(); i++) { Object d = v.get(i); if (d instanceof Double) { values[i] = (Double) d; } } } } else { Double v = Double.parseDouble(vS.toString()); values = new double[1]; values[0] = v; } } } if (values != null) { Instance instance = new Instance(1, values); instances.add(instance); } } nodes.close(); return instances; }
From source file:eu.cassandra.utils.Utils.java
License:Apache License
/** * This function is used in order to create clusters of points of interest * based on the active power difference they have. * //from www . j a v a 2 s. co m * @param pois * The list of points of interest that will be clustered. * @return The newly created clusters with the points that are comprising * them. * @throws Exception */ public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias) throws Exception { // Initialize the auxiliary variables ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>(); // Estimating the number of clusters that will be created int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST)) + bias; log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = " + numberOfClusters); // Create a new empty list of points for each cluster for (int i = 0; i < numberOfClusters; i++) result.add(new ArrayList<PointOfInterest>()); // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiff"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); Instances instances = new Instances("Points of Interest", attr, 0); // Each event is translated to an instance with the above attributes for (int i = 0; i < pois.size(); i++) { Instance inst = new DenseInstance(2); inst.setValue(id, i); inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff())); instances.add(inst); } // System.out.println(instances.toString()); Instances newInst = null; log.debug("Instances: " + instances.toSummaryString()); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(numberOfClusters); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); // System.out.println(newInst.toString()); // Parse through the dataset to see where each point is placed in the // clusters. for (int i = 0; i < newInst.size(); i++) { String cluster = newInst.get(i).stringValue(newInst.attribute(2)); cluster = cluster.replace("cluster", ""); log.debug("Point of Interest: " + i + " Cluster: " + cluster); result.get(Integer.parseInt(cluster) - 1).add(pois.get(i)); } // Sorting the each cluster points by their minutes. for (int i = result.size() - 1; i >= 0; i--) { if (result.get(i).size() == 0) result.remove(i); else Collections.sort(result.get(i), Constants.comp); } // Sorting the all clusters by their active power. Collections.sort(result, Constants.comp5); return result; }
From source file:examples.Pair.java
License:Open Source License
public static Pair<Instances, Instances> seprateTestAndTrainingSets(Instances instances, double probability) { Instances trainingSet = new Instances(instances, 0, 0); Instances testSet = new Instances(instances, 0, 0); Random rand = new Random(); rand.setSeed(1L);/*from w w w.j a va 2 s .c o m*/ for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); if (rand.nextDouble() > probability) { testSet.add(instance); } else { trainingSet.add(instance); } } return new Pair<Instances, Instances>(trainingSet, testSet); }
From source file:examples.TrainerFrame.java
private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed //This is a temporary fix to make it appear like its finished pBar.setMaximum(7);//from w w w . j a v a2s .c o m pBar.setValue(0); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Target Features"); //Generate Target Features String featuresTarget = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(1); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Other Features"); //Generate Non-targe features Features String featuresOther = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(2); pBar.repaint(); jLabelTrainerStatus.setText("Parsing Features"); //Load Target Arrf File BufferedReader readerTarget; Instances dataTarget = null; try { readerTarget = new BufferedReader(new FileReader(featuresTarget)); dataTarget = new Instances(readerTarget); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(3); pBar.repaint(); //Load Other Arrf File BufferedReader readerOther; Instances dataOther = null; try { readerOther = new BufferedReader(new FileReader(featuresOther)); dataOther = new Instances(readerOther); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(4); pBar.repaint(); jLabelTrainerStatus.setText("Training Classifier"); Instances newData = new Instances(dataTarget); FastVector typeList = new FastVector() { }; typeList.add("target"); typeList.add("other"); newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), newData.numAttributes()); for (Instance instance : newData) { instance.setValue(newData.numAttributes() - 1, "target"); } dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), dataOther.numAttributes()); for (Instance instance : dataOther) { instance.setValue(newData.numAttributes() - 1, "other"); newData.add(instance); } newData.setClassIndex(newData.numAttributes() - 1); pBar.setValue(5); pBar.repaint(); ArffSaver saver = new ArffSaver(); saver.setInstances(newData); try { saver.setFile(new File("AnimalCallTrainingFile.arff")); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(6); pBar.repaint(); //Train a classifier String[] options = new String[1]; options[0] = "-U"; J48 tree = new J48(); try { tree.setOptions(options); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { tree.buildClassifier(newData); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree); System.out.println("classifier saved"); MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText()); GlobalData.classifierList.addElement(tempClass.name); pBar.setValue(7); pBar.repaint(); jLabelTrainerStatus.setText("Finished"); }
From source file:facebookpostpuller.PostModel.java
public static void convertToArff(File file) throws Exception { FastVector atts;/*from w w w.j ava 2s.co m*/ FastVector attVals; Instances data; double[] vals; file = new File(file + ".arff"); atts = new FastVector(); atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014 atts.addElement(new Attribute(("message"), (FastVector) null)); attVals = new FastVector(); attVals.addElement("13-17"); attVals.addElement("18-24"); attVals.addElement("25-34"); attVals.addElement("35-44"); attVals.addElement("45-54"); atts.addElement(new Attribute("age-group", attVals)); data = new Instances("predict_age", atts, 0); Iterator it = posts.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry) it.next(); vals = new double[data.numAttributes()]; User user = (User) pairs.getValue(); String name = user.getName(); // 5/27/2014 String message = ((Post) (pairs.getKey())).getMessage(); Preprocess pre = new Preprocess(); message = pre.emoticons(message); message = pre.emoji(message); message = pre.url(message); //StringFilter filter = new StringFilter(message); vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014 vals[1] = data.attribute(1).addStringValue(message); int age = calculateAge(user.getBirthdayAsDate()); if (age >= 13 && age <= 17) { vals[2] = attVals.indexOf("13-17"); } else if (age >= 18 && age <= 24) { vals[2] = attVals.indexOf("18-24"); } else if (age >= 25 && age <= 34) { vals[2] = attVals.indexOf("25-34"); } else if (age >= 35 && age <= 44) { vals[2] = attVals.indexOf("35-44"); } else if (age >= 45) { // Modified 6/11/2014 vals[2] = attVals.indexOf("45-54"); } data.add(new Instance(1.0, vals)); it.remove(); } ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(file); saver.writeBatch(); }
From source file:facebookpostpuller.PostModelBACKUP.java
public static void convertToArff(File file) throws Exception { FastVector atts;/*from w w w . j a v a 2 s. co m*/ FastVector attVals; Instances data; double[] vals; file = new File(file + ".arff"); atts = new FastVector(); atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014 atts.addElement(new Attribute(("message"), (FastVector) null)); attVals = new FastVector(); attVals.addElement("13-17"); attVals.addElement("18-24"); attVals.addElement("25-34"); attVals.addElement("35-44"); attVals.addElement("45-54"); atts.addElement(new Attribute("age-group", attVals)); data = new Instances("predict_age", atts, 0); Iterator it = posts.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry) it.next(); vals = new double[data.numAttributes()]; User user = (User) pairs.getValue(); String name = user.getName(); // 5/27/2014 String message = ((Post) (pairs.getKey())).getMessage(); //StringFilter filter = new StringFilter(message); vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014 vals[1] = data.attribute(1).addStringValue(message); int age = calculateAge(user.getBirthdayAsDate()); if (age >= 13 && age <= 17) { vals[2] = attVals.indexOf("13-17"); } else if (age >= 18 && age <= 24) { vals[2] = attVals.indexOf("18-24"); } else if (age >= 25 && age <= 34) { vals[2] = attVals.indexOf("25-34"); } else if (age >= 35 && age <= 44) { vals[2] = attVals.indexOf("35-44"); } else if (age >= 45 && age <= 54) { vals[2] = attVals.indexOf("45-54"); } data.add(new Instance(1.0, vals)); it.remove(); } ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(file); saver.writeBatch(); }