List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:ocr.ARFFSymbolFilter.java
License:Apache License
public static void writeWeka(final String filenameout, final ArrayList<?> symbolData) { final int nsold = ARFFSymbolFilter.ns; ARFFSymbolFilter.tangent = (ARFFSymbolFilter.times > 1); try {/*from w w w . j a v a 2 s .c o m*/ if (!ARFFSymbolFilter.strokenumber) { ARFFSymbolFilter.ns = 1; } final DataOutputStream[] fileout = new DataOutputStream[ARFFSymbolFilter.ns]; final Instances[] instances = new Instances[ARFFSymbolFilter.ns]; System.out.println("Writing file"); for (int i = 0; i < ARFFSymbolFilter.ns; ++i) { final int k = ARFFSymbolFilter.strokenumber ? i : (nsold - 1); fileout[ARFFSymbolFilter.strokenumber ? i : 0] = new DataOutputStream(new FileOutputStream( filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#")); } final int tot = symbolData.size(); for (int j = 0; j < symbolData.size(); ++j) { final ArrayList<?> group = (ArrayList<?>) symbolData.get(j); for (int i = 0; i < group.size(); ++i) { final Symbol sym = (Symbol) group.get(i); final int k = ARFFSymbolFilter.strokenumber ? (sym.size() - 1) : 0; if (sym.name.equals("no_name") || sym.name.equals("empty_symbol")) { System.out.print("#" + sym.name + "#"); } else { for (int t = 0; t < ARFFSymbolFilter.times; ++t) { final String line = constructStringInstance(sym, ARFFSymbolFilter.alpha); if (line == null) { System.out.print("line=null!"); } else { if (instances[k] == null) { final StringTokenizer st = new StringTokenizer(line, " "); final int nt = st.countTokens() / 2; final FastVector att = new FastVector(); for (int kk = 0; kk < nt; ++kk) { final String token = st.nextToken(); att.addElement(new Attribute(new String(token))); st.nextToken(); } att.addElement(new Attribute("class", (FastVector) null)); (instances[k] = new Instances("Symbols of Size " + (k + 1), att, 1)) .setClassIndex(att.size() - 1); } final StringTokenizer st = new StringTokenizer(line, " "); final int nt = st.countTokens() / 2; final Instance inst = new Instance(nt + 1); for (int kk = 0; kk < nt; ++kk) { st.nextToken(); final String token = new String(st.nextToken()); inst.setValue(kk, Double.parseDouble(token)); } inst.setDataset(instances[k]); inst.setClassValue(oldReplace(sym.name, "\\", "")); instances[k].add(inst); } } } } if ((int) (100.0 * j) / tot % 10 == 0) { System.out.print((int) (100.0 * j) / tot + "%-"); } } for (int k = 0; k < ARFFSymbolFilter.ns; ++k) { if (fileout[ARFFSymbolFilter.strokenumber ? k : 0] == null) { System.out.println("fo" + fileout[ARFFSymbolFilter.strokenumber ? k : 0]); } if (instances[ARFFSymbolFilter.strokenumber ? k : 0] == null) { System.out.println("in:" + instances[ARFFSymbolFilter.strokenumber ? k : 0]); } fileout[ARFFSymbolFilter.strokenumber ? k : 0] .writeBytes(instances[ARFFSymbolFilter.strokenumber ? k : 0].toString()); fileout[ARFFSymbolFilter.strokenumber ? k : 0].close(); } final StringToNominal filter = new StringToNominal(); final String[] args = new String[4]; for (int k = 0; k < ARFFSymbolFilter.ns; ++k) { args[0] = "-i"; args[1] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#"; args[2] = "-o"; args[3] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff"; Filter.filterFile(filter, args); new File(args[1]).delete(); } System.out.println("100.0%"); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:org.conqat.engine.commons.machine_learning.InstanceCreatorBase.java
License:Apache License
/** Creates a weka instance for the given classification object. */ public weka.core.Instance createWekaInstance(T classificationObject, LABEL label) { // create empty instance. The size of the instance corresponds to the // number of features plus the class label. weka.core.Instance instance = new weka.core.Instance(features.size() + 1); // calculate features for (int i = 0; i < features.size(); i++) { instance.setValue((Attribute) attributes.elementAt(i), features.get(i).getValue(classificationObject)); }/*from ww w. j av a 2 s . c om*/ // set instance label Attribute classAttribute = (Attribute) attributes.elementAt(attributes.size() - 1); instance.setValue(classAttribute, label.name()); return instance; }
From source file:org.dkpro.similarity.ml.filters.LogFilter.java
License:Open Source License
@Override protected Instance process(Instance inst) throws Exception { Instance newInst = new DenseInstance(inst.numAttributes()); newInst.setValue(0, inst.value(0)); for (int i = 1; i < inst.numAttributes() - 1; i++) { double newVal = Math.log(inst.value(i) + 1); // double newVal = inst.value(i); // Passthrough newInst.setValue(i, newVal);// w w w. j a va 2s.c o m } newInst.setValue(inst.numAttributes() - 1, inst.value(inst.numAttributes() - 1)); return newInst; }
From source file:org.gc4mir.core.Core.java
public void addArffClasses() throws IOException, Exception { //opens classless arff BufferedReader reader = new BufferedReader(new FileReader("export/autoweka/clean.arff")); Instances data = new Instances(reader); reader.close();//from w w w.j a v a 2s .co m //add classes to file. //creates filter to add attribute Add filter; filter = new Add(); filter.setAttributeIndex("last"); //get category names to set in the header String nominalLabels = new String(); for (int i = 0; i < getBaseCategories().size(); i++) { Category current = getBaseCategories().getCategories().get(i); nominalLabels += current.getId(); if (i < getBaseCategories().size() - 1) { nominalLabels += ","; } } System.out.println(""); filter.setNominalLabels(nominalLabels); filter.setAttributeName("Category"); filter.setInputFormat(data); data = Filter.useFilter(data, filter); //add class values to each instance int classindex = data.numAttributes() - 1; for (int i = 0, n = 0; i < getBaseCategories().size(); i++) { Category current = getBaseCategories().getCategories().get(i); for (int k = 0; k < current.getRecordings().size(); k++) { Instance inst = data.instance(n); //inst.setDataset(null); inst.setValue(classindex, current.getId()); //inst.setDataset(data); n++; } //inst.insertAttributeAt(); } //saves arff file ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File("export/autoweka/train.arff")); saver.writeBatch(); System.out.println("File sucesfully saved, you can continue now"); }
From source file:org.hypknowsys.wumprep.WUMprepWrapper.java
License:Open Source License
/** * Creates a dummy dataset from the input format, sends it to the script and * reads the script output's ARFF information that in turn is used to set * <code>this</code>' output format. * // w w w . ja v a2s . co m * This mechanism allows a WUMprep script to alter the recordset layout as * long as this change is documented by the output ARFF header. For example, * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field * to <code>host_dns</code> when performing IP lookups. * * @param instanceInfo * The input format. * @return Object containing the output instance structure. */ public Instances getScriptOutputFormat(Instances instanceInfo) { Instances outputFormat = instanceInfo; Instances testData = new Instances(instanceInfo); Instance testInstance = new Instance(testData.numAttributes()); testData.delete(); testInstance.setDataset(testData); // Initialize the testInstance's attribute values for (int i = 0; i < testInstance.numAttributes(); i++) { String aName = testInstance.attribute(i).name(); if (aName.equals("host_ip")) testInstance.setValue(i, "127.0.0.1"); else if (aName.equals("ts_day")) testInstance.setValue(i, "01"); else if (aName.equals("ts_month")) testInstance.setValue(i, "Jan"); else if (aName.equals("ts_year")) testInstance.setValue(i, "2005"); else if (aName.equals("ts_hour")) testInstance.setValue(i, "11"); else if (aName.equals("ts_minutes")) testInstance.setValue(i, "55"); else if (aName.equals("ts_seconds")) testInstance.setValue(i, "00"); else if (aName.equals("tz")) testInstance.setValue(i, "+0200"); else testInstance.setValue(i, aName + "-dummy"); } testData.add(testInstance); WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args); testWrapper.start(); testWrapper.push(testData.toString()); testWrapper.push((Instance) null); class ErrorReader extends Thread implements Serializable { /** */ private static final long serialVersionUID = -488779846603045891L; PipedReader m_input = null; /** * Helper class for reading stderr output from the WUMprep script * * @param input The script's wrapper's stderr pipe reader */ ErrorReader(PipedReader input) { m_input = input; this.start(); } public void run() { try { while (m_input.read() >= 0) ; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // read the stderr output new ErrorReader(testWrapper.getErrorPipe()); try { // ignore the stderr output outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return outputFormat; }
From source file:org.iobserve.analysis.behavior.clustering.hierarchical.ElbowMethod.java
License:Apache License
/** * Calculate within-cluster sum-of-square (WSS) for a given cluster. * * @param cluster//from w ww .j av a2 s . c o m * Calculate the WSS for this cluster. * @return WSS **/ public double calcWSS(final List<Integer> cluster) { final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction(); final double[] sumAttValues = new double[this.instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); // Sum up all values of all instances. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] += instance.value(j); } } // Get average value of each attribute value. for (int j = 0; j < sumAttValues.length; j++) { sumAttValues[j] /= cluster.size(); } /* * Create a centroid of this cluster by setting the average attributes of this cluster as * its own. */ final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy(); for (int j = 0; j < this.instances.numAttributes(); j++) { centroid.setValue(j, sumAttValues[j]); } // Sum up distances of each data point in cluster to centroid to get WSS. double clusterWSS = 0.0; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); clusterWSS += Math.pow(distanceFunction.distance(centroid, instance), 2); } return clusterWSS; }
From source file:org.iobserve.analysis.behavior.clustering.hierarchical.GapStatisticMethod.java
License:Apache License
/** * Calculated error sum-of-squares (ESS) for a given cluster. * * @param cluster//from ww w . j a v a2s . c o m * Calculate the ESS for this cluster * @return ESS **/ public double calcESS(final List<Integer> cluster) { if ((cluster.size() == 0) || (cluster.size() == 1)) { return 0.0; } final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction(); final double[] sumAttValues = new double[this.instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); // Sum up all values of all instances. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] += instance.value(j); } } // Get average value of each attribute value. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] /= cluster.size(); } /* * Create a centroid of this cluster by setting the average attributes of this cluster as * its own. */ final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy(); for (int j = 0; j < this.instances.numAttributes(); j++) { centroid.setValue(j, sumAttValues[j]); } // Sum up distances of each data point in cluster to centroid to get ESS. double clusterESS = 0.0; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); clusterESS += distanceFunction.distance(centroid, instance); } return clusterESS / cluster.size(); }
From source file:org.iobserve.analysis.behavior.karlsruhe.AbstractClustering.java
License:Apache License
/** * It transforms the user sessions(userSessions in form of counts of their called operation * signatures) to Weka instances that can be used for the clustering. * * @param countModel/*from w w w. j a v a2 s .c om*/ * contains the userSessions in form of counts of called operation signatures * @param listOfDistinctOperationSignatures * contains the extracted distinct operation signatures of the input * entryCallSequenceModel * @return the Weka instances that hold the data that is used for the clustering */ protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel, final List<String> listOfDistinctOperationSignatures) { final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size(); final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures); for (int i = 0; i < numberOfDistinctOperationSignatures; i++) { final String attributeName = "Attribute" + i; final Attribute attribute = new Attribute(attributeName); fvWekaAttributes.addElement(attribute); } final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size()); for (final UserSessionAsCountsOfCalls userSession : countModel) { int indexOfAttribute = 0; final Instance instance = new Instance(numberOfDistinctOperationSignatures); for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) { instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute), userSession.getAbsoluteCountOfCalls()[row]); indexOfAttribute++; } clusterSet.add(instance); } return clusterSet; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java
License:Open Source License
/** * {@inheritDoc}/*from w ww . jav a 2s. co m*/ */ @Override public void buildClassifier(ContourDataGrid cData, VectorDataList bgData) throws Exception { // transform input data to weka mi-instances m_data = initDataset(cData.numFeatures(), 2, cData.totalLength() + bgData.numVectors(), cData.width()); for (int r = 0; r < cData.totalLength(); r++) { Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width()); for (int c = 0; c < cData.width(); c++) { int vecIdx = cData.getVectorIdx(c, r); Instance inst = new DenseInstance(cData.weight(vecIdx), cData.getVector(vecIdx)); inst.setDataset(bagData); bagData.add(inst); } int value = m_data.attribute(1).addRelation(bagData); Instance newBag = new DenseInstance(3); newBag.setValue(0, r); // bag id newBag.setValue(2, 1); // class attribute newBag.setValue(1, value); newBag.setWeight(1); newBag.setDataset(m_data); m_data.add(newBag); } for (int i = 0; i < bgData.numVectors(); i++) { Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width()); Instance inst = new DenseInstance(bgData.weight(i), bgData.getVector(i)); inst.setDataset(bagData); bagData.add(inst); int value = m_data.attribute(1).addRelation(bagData); Instance newBag = new DenseInstance(3); newBag.setValue(0, cData.totalLength() + i); newBag.setValue(2, 0); newBag.setValue(1, value); newBag.setWeight(1); newBag.setDataset(m_data); m_data.add(newBag); } m_classifier.buildClassifier(m_data); }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java
License:Open Source License
/** * {@inheritDoc}// w ww .j a va 2 s . c o m */ @Override public double contourProbability(double[] inst) throws Exception { Instances bagData = new Instances(m_data.attribute(1).relation(), 1); Instance i = new DenseInstance(1, inst); i.setDataset(bagData); bagData.add(i); Instance bag = new DenseInstance(3); bag.setDataset(m_data); int val = bag.attribute(1).addRelation(bagData); bag.setValue(1, val); return m_classifier.distributionForInstance(bag)[1]; }