List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:gr.iit.demokritos.cru.cps.ai.KeyphraseClustering.java
License:Open Source License
public KeyphraseClustering(ArrayList<String> k, int numberOfClust, String language, WNAccess wn, WNDE wnde, WNEL wnel) throws ClassNotFoundException { this.language = language; this.wn = wn; this.wd = new WordNetENDistance(); this.wdde = new WordNetDeDistance(wnde); this.wdel = new WordNetElDistance(wnel); this.keys = new ArrayList<String>(); for (int i = 0; i < k.size(); i++) { String[] tokenLine = k.get(i).split(";"); String key = tokenLine[0]; this.keys.add(key); }//w w w .ja v a 2 s . c o m Attribute words = new Attribute("words", (FastVector) null); FastVector fvWekaAttributes = new FastVector(); fvWekaAttributes.addElement(words); this.data = new Instances("words", fvWekaAttributes, 0); double sum = 0.0; for (String s : this.keys) { //keep the sum of the semantic distance between all the words for (String p : this.keys) { if (!p.equalsIgnoreCase(s)) { sum += getDistance(s, p); } } //create new instance for every key and add it to the data Instance inst = new Instance(1); //System.out.println(fvWekaAttributes.get(0)); //System.out.println(data.attribute(0)); inst.setValue(this.data.attribute(0), s); // data.add(new inst(1.0,vals)); this.data.add(inst); } if (numberOfClust == 0) { //sum += keys.size(); int numerator = (int) ceil(sum); int clust = (int) ceil(numerator / (double) (this.keys.size())); this.clusters = clust; } else { this.clusters = numberOfClust; } }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
/** * Converts an output space point to a Weka instance * @param point/*from w ww.j a va 2 s . c o m*/ * @return */ public static Instance convertPointToInstance(OutputSpacePoint point) { Instance inst = new Instance(point.getInputSpacePoint().numberDimensions() + 1); int index = 0; for (String k : point.getInputSpacePoint().getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getInputSpacePoint().getValue(k)); } inst.setValue(new Attribute(point.getKey(), index++), point.getValue()); return inst; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
/** * Converts an input space point to a Weka instance. * @param point//from w w w. ja v a 2 s . c o m * @return */ public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) { Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions()); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); } for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) { if (e.getValue() == null) { inst.setMissing(index++); } else { Attribute att = new Attribute(e.getKey(), index++); inst.setValue(att, e.getValue()); } } //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); for (String k : outputPoint.getOutputPoints().keySet()) { att.addElement(new Attribute(k, index++)); } Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
public static Instance convertPointToInstance(InputSpacePoint point) { Instance inst = new Instance(point.numberDimensions() + 1); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); }//w w w . j a v a2s . c om inst.setMissing(index); //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); att.addElement(new Attribute("objective", index++)); Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }
From source file:graph.clustering.NodeClusterer.java
License:Apache License
private Instances convertNodesInfoToInstances(long[] ids) { GraphQuery graph = new GraphQuery(graphDb); Map<String, String[]> nodesInfo = graph.getNodesInfo(ids); String[] attributeNames = new String[nodesInfo.keySet().size()]; // Declare the feature vector FastVector fvWekaAttributes = new FastVector(attributeNames.length); int attributeIndex = 0; for (String attributeName : nodesInfo.keySet()) { attributeNames[attributeIndex++] = attributeName; System.out.println("Attribute:\t" + attributeName); Set<String> valueSet = new HashSet<String>(); boolean isStringAttribute = false; String[] attributes = nodesInfo.get(attributeName); for (int i = 0; i < ids.length; i++) { valueSet.add(attributes[i]); if (attributes[i].split("\\s").length > 1) { isStringAttribute = true; }//from w ww .j av a 2 s.c om } Attribute wekaAttribute = null; if (isStringAttribute) { wekaAttribute = new Attribute(attributeName, (FastVector) null); } else { // Declare a nominal attribute along with its values FastVector fvNominalVal = new FastVector(valueSet.size()); for (String uniqueValue : valueSet) { fvNominalVal.addElement(uniqueValue.toLowerCase()); } wekaAttribute = new Attribute(attributeName, fvNominalVal); } // add this new attribute type to the feature vector fvWekaAttributes.addElement(wekaAttribute); } // Create an empty training set Instances clusterTrainingSet = new Instances("Rel", fvWekaAttributes, ids.length); for (int i = 0; i < ids.length; i++) { // Create the instance Instance instance = new Instance(attributeNames.length); for (int j = 0; j < attributeNames.length; j++) { String attributeValue = nodesInfo.get(attributeNames[j])[i]; if (attributeValue == null) { attributeValue = "none"; } else { attributeValue = attributeValue.toLowerCase(); } instance.setValue((Attribute) fvWekaAttributes.elementAt(j), attributeValue); } // add the instance to the data set clusterTrainingSet.add(instance); } return clusterTrainingSet; }
From source file:graph_create_test.ARRF_DataBase.java
public void GenerateEdgesARFF_file(String pathARFF_DataBase, LinkedList<Edge> L_a_edges) { FastVector attributes;//from w w w. ja v a 2 s . c o m attributes = new FastVector(); //create attributes LinkedList<G_Attribute> node_init = L_a_edges.get(0).EdgeAttList; for (int i = 2; i < node_init.size(); i++) { G_Attribute o = node_init.get(i); attributes.addElement(new Attribute(o.name)); } //Create Instances to full data in it trainingSet = new Instances("graph", attributes, 0); trainingSet.setClassIndex(trainingSet.numAttributes() - 1); Instance instance = new DenseInstance(trainingSet.numAttributes()); for (int i = 0; i < L_a_edges.size(); i++) { Edge edge = L_a_edges.get(i); for (int j = 2; j < edge.EdgeAttList.size(); j++) { G_Attribute v = edge.EdgeAttList.get(j); Float s = new Float(v.value); instance.setValue(j - 2, s); } trainingSet.add(instance); } System.err.println("data instances :\n" + trainingSet.toString()); try { trainingSet = trainingSet; ArffSaver arffSaverInstance = new ArffSaver(); arffSaverInstance.setInstances(trainingSet); arffSaverInstance.setFile(new File(pathARFF_DataBase)); arffSaverInstance.writeBatch(); } catch (IOException ex) { Logger.getLogger(Graph.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:graph_create_test.ARRF_DataBase.java
public void GenerateVertexARFF_file(String pathARFF_DataBase, LinkedList<Vertex> L_a_nodes) { FastVector attributes;//from w w w . java2s .com attributes = new FastVector(); //create attributes LinkedList<G_Attribute> node_init = L_a_nodes.get(0).VertexAttList; for (int i = 1; i < node_init.size(); i++) { G_Attribute o = node_init.get(i); attributes.addElement(new Attribute(o.name)); } //Create Instances to full data in it trainingSet = new Instances("graph", attributes, 0); trainingSet.setClassIndex(trainingSet.numAttributes() - 1); Instance instance = new DenseInstance(trainingSet.numAttributes()); for (int i = 0; i < L_a_nodes.size(); i++) { Vertex node = L_a_nodes.get(i); for (int j = 1; j < node.VertexAttList.size(); j++) { G_Attribute v = node.VertexAttList.get(j); Float s = new Float(v.value); instance.setValue(j - 1, s); } trainingSet.add(instance); } System.err.println("data instances :\n" + trainingSet.toString()); try { trainingSet = trainingSet; ArffSaver arffSaverInstance = new ArffSaver(); arffSaverInstance.setInstances(trainingSet); arffSaverInstance.setFile(new File(pathARFF_DataBase)); arffSaverInstance.writeBatch(); } catch (IOException ex) { Logger.getLogger(Graph.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:IntruderDetector.DecisionTree.java
public void addIDSInstance(RealTimeFeature newRealTimeFeature) { Instance NewInstance = new Instance(26); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(0), newRealTimeFeature.getDuration()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(1), newRealTimeFeature.getProtocol()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(2), newRealTimeFeature.getConnFlag()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(3), newRealTimeFeature.getNumPackets()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(4), newRealTimeFeature.getService()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(5), newRealTimeFeature.getNumTcpFin()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(6), newRealTimeFeature.getNumTcpSyn()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(7), newRealTimeFeature.getNumTcpReset()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(8), newRealTimeFeature.getNumTcpPush()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(9), newRealTimeFeature.getNumTcpAck()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(10), newRealTimeFeature.getNumTcpUrg()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(11), newRealTimeFeature.getNumPktSrc()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(12), newRealTimeFeature.getNumPktDst()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(13), newRealTimeFeature.getSrcBytes()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(14), newRealTimeFeature.getDstBytes()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(15), newRealTimeFeature.getTimeBaseFeature().getNumSrc()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(16), newRealTimeFeature.getTimeBaseFeature().getNumSrcSamePort()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(17), newRealTimeFeature.getTimeBaseFeature().getNumSrcDiffPort()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(18), newRealTimeFeature.getTimeBaseFeature().getNumSYNSameSrc()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(19), newRealTimeFeature.getTimeBaseFeature().getNumRSTSameSrc()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(20), newRealTimeFeature.getTimeBaseFeature().getNumDst()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(21), newRealTimeFeature.getTimeBaseFeature().getNumDstSamePort()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(22), newRealTimeFeature.getTimeBaseFeature().getNumDstDiffPort()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(23), newRealTimeFeature.getTimeBaseFeature().getNumSYNSameDst()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(24), newRealTimeFeature.getTimeBaseFeature().getNumRSTSameDst()); NewInstance.setValue((Attribute) IDSRealtimeAttributeSet.elementAt(25), IDSmain.IDSmain.atktype); IDSmain.IDSmain.IDSInstances.add(NewInstance); // System.out.println(NewInstance.toString()); }
From source file:jjj.asap.sas.datasets.job.Import.java
License:Open Source License
private void buildDataset(int k, String input, String output) { if (IOUtils.exists(output)) { Job.log("NOTE", output + " already exists - nothing to do."); return;/*ww w . ja va 2 s . c o m*/ } // create empty dataset final DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); if (Contest.isMultiChoice(k)) { builder.addNominalVariable("color", Contest.COLORS); } builder.addStringVariable("text"); builder.addNominalVariable("score", Contest.getRubrics(k)); Instances dataset = builder.getDataset(IOUtils.getName(output)); // now add obs Iterator<String> it = new FileIterator(input); while (it.hasNext()) { // parse data String[] data = StringUtils.safeSplit(it.next(), "\t", 6); double id = Double.parseDouble(data[0]); String score = data[2]; String color = data[4]; String text = data[5]; // add to dataset dataset.add(new DenseInstance(dataset.numAttributes())); Instance ob = dataset.lastInstance(); ob.setValue(dataset.attribute("id"), id); if (Contest.isMultiChoice(k)) { ob.setValue(dataset.attribute("color"), color); } ob.setValue(dataset.attribute("text"), text); if ("?".equals(score)) { ob.setValue(dataset.attribute("score"), Utils.missingValue()); } else { ob.setValue(dataset.attribute("score"), score); } } Dataset.save(output, dataset); }
From source file:jjj.asap.sas.parser.job.ImportParserData.java
License:Open Source License
private void process(final String parent, int essaySet, Map<Double, List<String>> tags, Map<Double, List<String>> parseTrees, Map<Double, List<String>> depends) { // check if output exists boolean any = false; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-extra-stats.arff")) any = true;// w w w . ja v a2 s. c om if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-pos-tags.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-parse-tree.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends0.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends1.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends2.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends3.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends4.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends5.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends6.arff")) any = true; if (!any) { Job.log("NOTE", "work/datasets/" + parent + "/" + essaySet + "-*.arff returns all required datasets - nothing to do"); return; } // Load an existing dataset to use as a template. Instances dataset = Dataset.load("work/datasets/" + parent + "/" + essaySet + "-spell-checked.arff"); // create the output datasets here. except for the extra statistics, // the format is the same as 'dataset'. Instances tagsData = new Instances(dataset, 0); tagsData.setRelationName(essaySet + "-pos-tags.arff"); Instances treeData = new Instances(dataset, 0); treeData.setRelationName(essaySet + "-parse-tree.arff"); Instances dependsData[] = new Instances[7]; for (int j = 0; j < 7; j++) { dependsData[j] = new Instances(dataset, 0); dependsData[j].setRelationName(essaySet + "-depends" + j + ".arff"); } // extra stats DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); if (Contest.isMultiChoice(essaySet)) { builder.addNominalVariable("color", Contest.COLORS); } builder.addVariable("x_sent"); builder.addVariable("x_para"); builder.addVariable("x_length"); builder.addVariable("x_words"); builder.addVariable("x_unique_words"); builder.addNominalVariable("score", Contest.getRubrics(essaySet)); Instances extraStats = builder.getDataset(essaySet + "-extra-stats.arff"); // now add rows for each instance for (int i = 0; i < dataset.numInstances(); i++) { // common variables Instance ob = dataset.instance(i); double id = ob.value(0); String y = ob.isMissing(dataset.numAttributes() - 1) ? null : ob.stringValue(dataset.numAttributes() - 1); String color = Contest.isMultiChoice(essaySet) ? ob.stringValue(dataset.attribute("color")) : null; String str = ob.stringValue(dataset.attribute("text")); // // Extra stats // int nSent = tags.containsKey(id) ? tags.get(id).size() : 0; int nPara = 0; for (int a = 0; a < str.length(); a++) { if (str.charAt(a) == '^') nPara++; } int nLength = str.length(); int nWords = 0; int nUniqueWords = 0; String[] words = str.toLowerCase().split(" "); nWords = words.length; Set<String> u = new HashSet<String>(); for (String w : words) { u.add(w); } nUniqueWords = u.size(); extraStats.add(new DenseInstance(extraStats.numAttributes())); Instance extra = extraStats.lastInstance(); extra.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { extra.setValue(1, color); } extra.setValue(extraStats.attribute("x_sent"), nSent); extra.setValue(extraStats.attribute("x_para"), nPara); extra.setValue(extraStats.attribute("x_length"), nLength); extra.setValue(extraStats.attribute("x_words"), nWords); extra.setValue(extraStats.attribute("x_unique_words"), nUniqueWords); if (y == null) extra.setValue(extraStats.numAttributes() - 1, Utils.missingValue()); else extra.setValue(extraStats.numAttributes() - 1, y); // // POS tags // String tagsText = ""; List<String> tagsList = tags.get(id); if (tagsList == null || tagsList.isEmpty()) { Job.log("WARNING", "no tags for " + id); tagsText = "x"; } else { for (String tagsItem : tagsList) { tagsText += tagsItem; } } tagsData.add(new DenseInstance(ob.numAttributes())); Instance tagsOb = tagsData.lastInstance(); tagsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { tagsOb.setValue(1, color); tagsOb.setValue(2, tagsText.trim()); if (y == null) { tagsOb.setValue(3, Utils.missingValue()); } else { tagsOb.setValue(3, y); } } else { tagsOb.setValue(1, tagsText.trim()); if (y == null) { tagsOb.setValue(2, Utils.missingValue()); } else { tagsOb.setValue(2, y); } } // // Parse Tree // String treeText = ""; List<String> treeList = parseTrees.get(id); if (treeList == null || treeList.isEmpty()) { Job.log("WARNING", "no parse tree for " + id); treeText = "x"; } else { for (String treeItem : treeList) { treeText += treeItem; } } treeData.add(new DenseInstance(ob.numAttributes())); Instance treeOb = treeData.lastInstance(); treeOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { treeOb.setValue(1, color); treeOb.setValue(2, treeText.trim()); if (y == null) { treeOb.setValue(3, Utils.missingValue()); } else { treeOb.setValue(3, y); } } else { treeOb.setValue(1, treeText.trim()); if (y == null) { treeOb.setValue(2, Utils.missingValue()); } else { treeOb.setValue(2, y); } } // // Depends data // for (int j = 0; j < 7; j++) { String text = ""; List<String> list = depends.get(id); if (list == null || list.isEmpty()) { Job.log("WARNING", "no depends for " + id); text = "x"; } else { for (String item : list) { String[] term = StringUtils.safeSplit(item, "/", 3); switch (j) { case 0: text += item; break; case 1: text += term[1] + "/" + term[2]; break; case 2: text += term[0] + "/" + term[2]; break; case 3: text += term[0] + "/" + term[1]; break; case 4: text += term[0]; break; case 5: text += term[1]; break; case 6: text += term[2]; break; } text += " "; } } dependsData[j].add(new DenseInstance(ob.numAttributes())); Instance dependsOb = dependsData[j].lastInstance(); dependsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { dependsOb.setValue(1, color); dependsOb.setValue(2, text.trim()); if (y == null) { dependsOb.setValue(3, Utils.missingValue()); } else { dependsOb.setValue(3, y); } } else { dependsOb.setValue(1, text.trim()); if (y == null) { dependsOb.setValue(2, Utils.missingValue()); } else { dependsOb.setValue(2, y); } } } // j } // dataset // Now save the new datasets Dataset.save("work/datasets/" + parent + "/" + tagsData.relationName(), tagsData); Dataset.save("work/datasets/" + parent + "/" + treeData.relationName(), treeData); for (int j = 0; j < 7; j++) { Dataset.save("work/datasets/" + parent + "/" + dependsData[j].relationName(), dependsData[j]); } Dataset.save("work/datasets/" + parent + "/" + extraStats.relationName(), extraStats); }