List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java
License:Open Source License
@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if (request != null && request.getData() != null && request.getData().length > 0) { FastVector fvWekaAttributes = new FastVector(2); FastVector nil = null;/*w w w .j a v a2 s .c o m*/ Attribute attr0 = new Attribute("text", nil, 0); FastVector fv = new FastVector(); for (String nominal : request.getClassVars()) { fv.addElement(nominal); } Attribute attr1 = new Attribute("class", fv, 1); fvWekaAttributes.addElement(attr0); fvWekaAttributes.addElement(attr1); Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length); ins.setClassIndex(1); for (Text s : request.getData()) { Instance i = new Instance(2); i.setValue(attr0, s.getText()); i.setValue(attr1, s.getTclass()); ins.add(i); } return new Dataset(ins); } return null; }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * FAST HACK REMOVING FUNCTIONALITIES FROM WEKA ORIGINAL METHOD! * /* w w w . j a v a 2s.com*/ * @param rs * @return * @throws SQLException */ public static Instances retrieveInstanceFromResultSet(ResultSet rs) throws SQLException { ResultSetMetaData md = rs.getMetaData(); // Determine structure of the instances int numAttributes = md.getColumnCount(); int[] attributeTypes = new int[numAttributes]; Hashtable[] nominalIndexes = new Hashtable[numAttributes]; FastVector[] nominalStrings = new FastVector[numAttributes]; for (int i = 1; i <= numAttributes; i++) { attributeTypes[i - 1] = Attribute.NUMERIC; } // For sqlite // cache column names because the last while(rs.next()) { iteration for // the tuples below will close the md object: Vector<String> columnNames = new Vector<String>(); for (int i = 0; i < numAttributes; i++) { columnNames.add(md.getColumnName(i + 1)); } // Step through the tuples FastVector instances = new FastVector(); int rowCount = 0; while (rs.next()) { double[] vals = new double[numAttributes]; for (int i = 1; i <= numAttributes; i++) { int in = rs.getInt(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = in; } Instance newInst = new Instance(1.0, vals); instances.addElement(newInst); rowCount++; } } // disconnectFromDatabase(); (perhaps other queries might be made) // Create the header and add the instances to the dataset FastVector attribInfo = new FastVector(); for (int i = 0; i < numAttributes; i++) { /* Fix for databases that uppercase column names */ // String attribName = attributeCaseFix(md.getColumnName(i + 1)); String attribName = columnNames.get(i); switch (attributeTypes[i]) { case Attribute.NOMINAL: attribInfo.addElement(new Attribute(attribName, nominalStrings[i])); break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: Attribute att = new Attribute(attribName, (FastVector) null); attribInfo.addElement(att); for (int n = 0; n < nominalStrings[i].size(); n++) { att.addStringValue((String) nominalStrings[i].elementAt(n)); } break; case Attribute.DATE: attribInfo.addElement(new Attribute(attribName, (String) null)); break; default: throw new SQLException("Unknown attribute type"); } } Instances result = new Instances("QueryResult", attribInfo, instances.size()); for (int i = 0; i < instances.size(); i++) { result.add((Instance) instances.elementAt(i)); } rs.close(); return result; }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customKnnClassifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); classVal.add("WALKING"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);//from w w w . j ava 2 s. co m try { activityId = (int) (MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance())); classLabel = dataUnlabeled.firstInstance().classAttribute().value(activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.e("classifyActivity Knn," -> Impiegati: // "+(ts.getTime()-ts1.getTime())+" ms;\n"); return getActivityIDofClassLabel(classLabel); }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customJRipClassifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("WALKING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);/*from w ww. ja v a 2s . co m*/ try { activityId = (int) MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance()); Log.i("classifyActivity JRip ---->", activityId + ""); classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.i("classifyActivity JRip"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n"); return getActivityIDofClassLabel(classLabel); }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customJ48Classifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); classVal.add("WALKING"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);//from w w w. j a v a2 s . c o m try { activityId = (int) getJ48ActivityId( MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance())); classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.e("classifyActivity J48"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n"); return activityId;// getActivityIDofClassLabel(classLabel); }
From source file:com.sliit.normalize.NormalizeDataset.java
public String normalizeDataset() { System.out.println("start normalizing data"); String filePathOut = ""; try {/*w ww. j a v a2 s. com*/ CSVLoader loader = new CSVLoader(); if (reducedDiemensionFile != null) { loader.setSource(reducedDiemensionFile); } else { if (tempFIle != null && tempFIle.exists()) { loader.setSource(tempFIle); } else { loader.setSource(csvFile); } } Instances dataInstance = loader.getDataSet(); Normalize normalize = new Normalize(); dataInstance.setClassIndex(dataInstance.numAttributes() - 1); normalize.setInputFormat(dataInstance); String directory = csvFile.getParent(); outputFile = new File(directory + "/" + "normalized" + csvFile.getName()); if (!outputFile.exists()) { outputFile.createNewFile(); } CSVSaver saver = new CSVSaver(); saver.setFile(outputFile); for (int i = 1; i < dataInstance.numInstances(); i++) { normalize.input(dataInstance.instance(i)); } normalize.batchFinished(); Instances outPut = new Instances(dataInstance, 0); for (int i = 1; i < dataInstance.numInstances(); i++) { outPut.add(normalize.output()); } Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1); for (int j = 0; j < attribute.numValues(); j++) { if (attribute.value(j).equals("normal.")) { outPut.renameAttributeValue(attribute, attribute.value(j), "0"); } else { outPut.renameAttributeValue(attribute, attribute.value(j), "1"); } } saver.setInstances(outPut); saver.writeBatch(); writeToNewFile(directory); filePathOut = directory + "norm" + csvFile.getName(); if (tempFIle != null) { tempFIle.delete(); } if (reducedDiemensionFile != null) { reducedDiemensionFile.delete(); } outputFile.delete(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } catch (Exception e) { log.error("Error occurred:" + e.getMessage()); } return filePathOut; }
From source file:com.yahoo.labs.samoa.instances.SamoaToWekaInstanceConverter.java
License:Apache License
/** * Weka instances./*from w w w .ja v a 2s .c o m*/ * * @param instances the instances * @return the weka.core. instances */ public weka.core.Instances wekaInstances(Instances instances) { weka.core.Instances wekaInstances = wekaInstancesInformation(instances); //We assume that we have only one WekaInstanceInformation for SamoaToWekaInstanceConverter this.wekaInstanceInformation = wekaInstances; for (int i = 0; i < instances.numInstances(); i++) { wekaInstances.add(wekaInstance(instances.instance(i))); } return wekaInstances; }
From source file:core.classifier.MyFirstClassifier.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems./* w w w. j ava 2 s. c o m*/ * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:core.ClusterEvaluationEX.java
License:Open Source License
/** * num??/* w w w. j a va 2 s .c o m*/ * Returns the Centroids * */ public Instances getCentroids(int num) { FastVector atts = new FastVector(); Attribute clusterID = new Attribute("clusterID"); atts.addElement(clusterID); Instances data = new Instances("centroids", atts, m_numClusters); for (int i = 0; i < ID.numAttributes() - 1; i++) { Attribute att = new Attribute("Subject" + String.valueOf(i)); atts.addElement(att); } double[] map = new double[m_numClusters]; double[] temp = new double[m_clusterAssignments.length]; System.arraycopy(m_clusterAssignments, 0, temp, 0, m_clusterAssignments.length); int n = map.length; for (int i = 0; i < m_clusterAssignments.length; i++) { double id = temp[i]; if (id == -1) continue; boolean flag = true; for (int j = 0; j < temp.length; j++) { if (temp[j] == id) { temp[j] = -1; } } if (flag && n != -1) { map[map.length - n] = id + num; n--; } else if (n != -1) { continue; } else { break; } } for (int i = 0; i < map.length; i++) { double id = map[i]; double[] averatts = new double[ID.numAttributes()]; int count = 0; for (int j = 0; j < ID.numInstances(); j++) { Instance iter = ID.instance(j); if (iter.value(0) == id) { averatts = CommonMethords.add(averatts, iter.toDoubleArray()); count++; } } averatts = CommonMethords.calAver(averatts, count); Instance ins = new Instance(1, averatts); data.add(ins); } return data; }
From source file:core.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by a * call to getStructure then method should do so before processing the rest of * the data set.//from w w w . ja va 2 s . c o m * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if (getDirectory() == null) { throw new IOException("No directory/source has been specified"); } String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); Enumeration<Object> enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) { Object oo = enm.nextElement(); if (oo instanceof SerializedObject) { classes.add(((SerializedObject) oo).getObject().toString()); } else { classes.add(oo.toString()); } } Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (String file : files) { try { fileCount++; if (getDebug()) { System.err.println("processing " + fileCount + " : " + subdirPath + " : " + file); } double[] newInst = null; if (m_OutputFilename) { newInst = new double[3]; } else { newInst = new double[2]; } File txt = new File(directoryPath + File.separator + subdirPath + File.separator + file); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); /*int c; while ((c = is.read()) != -1) { txtStr.append((char) c); }*/ FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + System.getProperty("line.separator")); } newInst[0] = data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) { newInst[1] = data.attribute(1).addStringValue(subdirPath + File.separator + file); } newInst[data.classIndex()] = k; data.add(new DenseInstance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + file); } } } return data; }