List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:com.reactivetechnologies.analytics.core.eval.AdaBoostM1WithBuiltClassifiers.java
License:Open Source License
@Override protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, training;//from ww w . j av a 2 s . c o m double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { /** Changed here: DO NOT build classifier*/ // Build and evaluate classifier //m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample); /** End change */ evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < 10)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java
License:Open Source License
@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if (request != null && request.getData() != null && request.getData().length > 0) { FastVector fvWekaAttributes = new FastVector(2); FastVector nil = null;/*from w w w . ja v a 2s . co m*/ Attribute attr0 = new Attribute("text", nil, 0); FastVector fv = new FastVector(); for (String nominal : request.getClassVars()) { fv.addElement(nominal); } Attribute attr1 = new Attribute("class", fv, 1); fvWekaAttributes.addElement(attr0); fvWekaAttributes.addElement(attr1); Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length); ins.setClassIndex(1); for (Text s : request.getData()) { Instance i = new Instance(2); i.setValue(attr0, s.getText()); i.setValue(attr1, s.getTclass()); ins.add(i); } return new Dataset(ins); } return null; }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * FAST HACK REMOVING FUNCTIONALITIES FROM WEKA ORIGINAL METHOD! * /* ww w . j a v a 2 s . c om*/ * @param rs * @return * @throws SQLException */ public static Instances retrieveInstanceFromResultSet(ResultSet rs) throws SQLException { ResultSetMetaData md = rs.getMetaData(); // Determine structure of the instances int numAttributes = md.getColumnCount(); int[] attributeTypes = new int[numAttributes]; Hashtable[] nominalIndexes = new Hashtable[numAttributes]; FastVector[] nominalStrings = new FastVector[numAttributes]; for (int i = 1; i <= numAttributes; i++) { attributeTypes[i - 1] = Attribute.NUMERIC; } // For sqlite // cache column names because the last while(rs.next()) { iteration for // the tuples below will close the md object: Vector<String> columnNames = new Vector<String>(); for (int i = 0; i < numAttributes; i++) { columnNames.add(md.getColumnName(i + 1)); } // Step through the tuples FastVector instances = new FastVector(); int rowCount = 0; while (rs.next()) { double[] vals = new double[numAttributes]; for (int i = 1; i <= numAttributes; i++) { int in = rs.getInt(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = in; } Instance newInst = new Instance(1.0, vals); instances.addElement(newInst); rowCount++; } } // disconnectFromDatabase(); (perhaps other queries might be made) // Create the header and add the instances to the dataset FastVector attribInfo = new FastVector(); for (int i = 0; i < numAttributes; i++) { /* Fix for databases that uppercase column names */ // String attribName = attributeCaseFix(md.getColumnName(i + 1)); String attribName = columnNames.get(i); switch (attributeTypes[i]) { case Attribute.NOMINAL: attribInfo.addElement(new Attribute(attribName, nominalStrings[i])); break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: Attribute att = new Attribute(attribName, (FastVector) null); attribInfo.addElement(att); for (int n = 0; n < nominalStrings[i].size(); n++) { att.addStringValue((String) nominalStrings[i].elementAt(n)); } break; case Attribute.DATE: attribInfo.addElement(new Attribute(attribName, (String) null)); break; default: throw new SQLException("Unknown attribute type"); } } Instances result = new Instances("QueryResult", attribInfo, instances.size()); for (int i = 0; i < instances.size(); i++) { result.add((Instance) instances.elementAt(i)); } rs.close(); return result; }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customKnnClassifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); classVal.add("WALKING"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);// w w w.j a va 2 s . c o m try { activityId = (int) (MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance())); classLabel = dataUnlabeled.firstInstance().classAttribute().value(activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.e("classifyActivity Knn," -> Impiegati: // "+(ts.getTime()-ts1.getTime())+" ms;\n"); return getActivityIDofClassLabel(classLabel); }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customJRipClassifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("WALKING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);//from w w w .j a v a2 s . co m try { activityId = (int) MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance()); Log.i("classifyActivity JRip ---->", activityId + ""); classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.i("classifyActivity JRip"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n"); return getActivityIDofClassLabel(classLabel); }
From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java
License:LGPL
public static int customJ48Classifier(double[] newInstanceArray) { // TODO Auto-generated method stub ts1 = new Timestamp(System.currentTimeMillis()); int activityId = 0; String classLabel = ""; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("STANDING"); classVal.add("SITTING"); classVal.add("LYINGDOWN"); classVal.add("WALKING"); atts.add(new Attribute("class", classVal)); atts.add(new Attribute("1_1_2_1")); atts.add(new Attribute("1_1_3_1")); atts.add(new Attribute("1_1_9_2")); atts.add(new Attribute("2_1_3_1")); atts.add(new Attribute("2_1_4_1")); atts.add(new Attribute("2_1_9_2")); Instances dataUnlabeled = new Instances("TestInstances", atts, 0); dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray)); dataUnlabeled.setClassIndex(0);//w w w . jav a 2 s.c o m try { activityId = (int) getJ48ActivityId( MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance())); classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } ts = new Timestamp(System.currentTimeMillis()); // Log.e("classifyActivity J48"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n"); return activityId;// getActivityIDofClassLabel(classLabel); }
From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java
License:Apache License
/** * Internal function which initialized the {@link Instances} used by the * {@link Classifier} wrapped by the {@link AnthOnlineClassifier} class. */// www. j a v a2s . c om private void initInstances() { // gather attributes ArrayList<Attribute> attributes = new ArrayList<Attribute>(); ArrayList<String> allowedClasses = new ArrayList<String>(); allowedClasses.add("sem"); allowedClasses.add("nonsem"); Attribute classAttribute = new Attribute("class", allowedClasses); attributes.add(classAttribute); // this looks somehow stupid to me :/ List<String> vector = null; attributes.add(new Attribute("domain", vector)); attributes.add(new Attribute("sempar")); attributes.add(new Attribute("nonsempar")); attributes.add(new Attribute("semsib")); attributes.add(new Attribute("nonsemsib")); for (int i = 0; i < hashTrickSize; i++) { // the boolAttValues here should not be necessary but based on some // runtime experiements they make a (slight) difference as it is not // possible to create directly boolean attributes. The time to // define a split is reduced by doing this with nominal. attributes.add(new Attribute(getAttributeNameOfHash(i), boolAttValues)); } // now we create the Instances instances = new Instances("Anthelion", attributes, 1); instances.setClass(classAttribute); attributesIndex = new HashMap<String, Integer>(); for (int i = 0; i < attributes.size(); i++) { attributesIndex.put(attributes.get(i).name(), i); } // set dimension (class + domain + 4xgraph + hashes) dimension = 1 + 1 + 4 + hashTrickSize; // init replacement array replaceMissingValues = new double[dimension]; for (int i = 0; i < dimension; i++) { replaceMissingValues[i] = 0.0; } }
From source file:core.classification.Classifiers.java
License:Open Source License
/** * Private constructor for the <code>Classifiers</code> object * @param train//from w w w.j av a 2 s. co m */ private Classifiers(boolean train) { SCA = new BayesNet(); SCB = new MultilayerPerceptron(); SCC1 = new MultilayerPerceptron(); SCC2 = new MultilayerPerceptron(); SCC3 = new MultilayerPerceptron(); RC = new CostSensitiveClassifier(); YNC = new J48(); if (train) { try { this.trainSC(); } catch (Exception e) { System.out.println("The system encountered the following error while training SC:"); e.printStackTrace(); } try { this.trainRC(); } catch (Exception e) { System.out.println("The system encountered the following error while training RC:"); e.printStackTrace(); } try { this.trainYNC(); } catch (Exception e) { System.out.println("The system encountered the following error while training YNC:"); e.printStackTrace(); } } else { try { readSC("SCA.model", "SCB.model", "SCC1.model", "SCC2.model", "SCC3.model"); readRC("RC.model"); readYNC("YNC.model"); } catch (Exception e) { System.out.println("Error while reading the classifiers: "); e.printStackTrace(); } } // Strutures Creations FastVector labels = new FastVector(); labels.addElement("0"); labels.addElement("1"); labels.addElement("2"); labels.addElement("3"); labels.addElement("4"); FastVector clabels = new FastVector(); clabels.addElement("1"); clabels.addElement("2"); clabels.addElement("3"); clabels.addElement("4"); FastVector clabels2 = new FastVector(); clabels2.addElement("0"); clabels2.addElement("1"); clabels2.addElement("2"); clabels2.addElement("3"); clabels2.addElement("4"); FastVector clabels3 = new FastVector(); clabels3.addElement("Y"); clabels3.addElement("N"); // Creating the structure for SC FastVector attrs = new FastVector(); attrs.addElement(new Attribute("RATIO")); attrs.addElement(new Attribute("CLASS", clabels)); dataStructSCA = new Instances("SCA-STRUCT", attrs, 0); dataStructSCA.setClassIndex(1); FastVector attrsB = new FastVector(); attrsB.addElement(new Attribute("H2")); attrsB.addElement(new Attribute("D2")); attrsB.addElement(new Attribute("DX")); attrsB.addElement(new Attribute("PCLASS", clabels)); attrsB.addElement(new Attribute("CLASS", clabels)); dataStructSCB = new Instances("SCB-STRUCT", attrsB, 0); dataStructSCB.setClassIndex(4); FastVector attrsC1 = new FastVector(); FastVector attrsC2 = new FastVector(); FastVector attrsC3 = new FastVector(); attrsC1.addElement(new Attribute("LH")); attrsC1.addElement(new Attribute("LD")); attrsC1.addElement(new Attribute("LDX")); attrsC1.addElement(new Attribute("LCLASS", clabels)); attrsC1.addElement(new Attribute("CLASS", clabels)); attrsC2.addElement(new Attribute("EH")); attrsC2.addElement(new Attribute("ED")); attrsC2.addElement(new Attribute("EDX")); attrsC2.addElement(new Attribute("ECLASS", clabels)); attrsC2.addElement(new Attribute("CLASS", clabels)); attrsC3.addElement(new Attribute("SH")); attrsC3.addElement(new Attribute("SD")); attrsC3.addElement(new Attribute("SDX")); attrsC3.addElement(new Attribute("SCLASS", clabels)); attrsC3.addElement(new Attribute("CLASS", clabels)); dataStructSCC1 = new Instances("SCC1-STRUCT", attrsC1, 0); dataStructSCC1.setClassIndex(4); dataStructSCC2 = new Instances("SCC2-STRUCT", attrsC2, 0); dataStructSCC2.setClassIndex(4); dataStructSCC3 = new Instances("SCC3-STRUCT", attrsC3, 0); dataStructSCC3.setClassIndex(4); FastVector attrs2 = new FastVector(); attrs2.addElement(new Attribute("H2")); attrs2.addElement(new Attribute("D2")); attrs2.addElement(new Attribute("DX")); attrs2.addElement(new Attribute("CLASS", clabels)); attrs2.addElement(new Attribute("PCLASS", clabels)); attrs2.addElement(new Attribute("RELID", clabels2)); dataStructRC = new Instances("RC-STRUCT", attrs2, 0); dataStructRC.setClassIndex(5); FastVector attrs3 = new FastVector(); attrs3.addElement(new Attribute("PCLASS", clabels)); attrs3.addElement(new Attribute("CCLASS", clabels)); attrs3.addElement(new Attribute("RAREA")); attrs3.addElement(new Attribute("H")); attrs3.addElement(new Attribute("D")); attrs3.addElement(new Attribute("V")); attrs3.addElement(new Attribute("YN", clabels3)); dataStructYC = new Instances("YC-STRUCT", attrs3, 0); dataStructYC.setClassIndex(6); }
From source file:core.ClusterEvaluationEX.java
License:Open Source License
/** * num??//from w w w .ja va 2s . co m * Returns the Centroids * */ public Instances getCentroids(int num) { FastVector atts = new FastVector(); Attribute clusterID = new Attribute("clusterID"); atts.addElement(clusterID); Instances data = new Instances("centroids", atts, m_numClusters); for (int i = 0; i < ID.numAttributes() - 1; i++) { Attribute att = new Attribute("Subject" + String.valueOf(i)); atts.addElement(att); } double[] map = new double[m_numClusters]; double[] temp = new double[m_clusterAssignments.length]; System.arraycopy(m_clusterAssignments, 0, temp, 0, m_clusterAssignments.length); int n = map.length; for (int i = 0; i < m_clusterAssignments.length; i++) { double id = temp[i]; if (id == -1) continue; boolean flag = true; for (int j = 0; j < temp.length; j++) { if (temp[j] == id) { temp[j] = -1; } } if (flag && n != -1) { map[map.length - n] = id + num; n--; } else if (n != -1) { continue; } else { break; } } for (int i = 0; i < map.length; i++) { double id = map[i]; double[] averatts = new double[ID.numAttributes()]; int count = 0; for (int j = 0; j < ID.numInstances(); j++) { Instance iter = ID.instance(j); if (iter.value(0) == id) { averatts = CommonMethords.add(averatts, iter.toDoubleArray()); count++; } } averatts = CommonMethords.calAver(averatts, count); Instance ins = new Instance(1, averatts); data.add(ins); } return data; }
From source file:core.DBScan.java
License:Open Source License
/** * Generate Clustering via DBScan/* w w w . java 2 s . com*/ * @param instances The instances that need to be clustered * @throws java.lang.Exception If clustering was not successful */ public void buildClusterer(Instances instances) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(instances); long time_1 = System.currentTimeMillis(); processed_InstanceID = 0; numberOfGeneratedClusters = 0; clusterID = 0; replaceMissingValues_Filter = new ReplaceMissingValues(); replaceMissingValues_Filter.setInputFormat(instances); Instances filteredInstances = Filter.useFilter(instances, replaceMissingValues_Filter); database = databaseForName(getDatabase_Type(), filteredInstances); for (int i = 0; i < database.getInstances().numInstances(); i++) { DataObject dataObject = dataObjectForName(getDatabase_distanceType(), database.getInstances().instance(i), Integer.toString(i), database); database.insert(dataObject); } database.setMinMaxValues(); Iterator iterator = database.dataObjectIterator(); while (iterator.hasNext()) { DataObject dataObject = (DataObject) iterator.next(); if (dataObject.getClusterLabel() == DataObject.UNCLASSIFIED) { if (expandCluster(dataObject)) { clusterID++; numberOfGeneratedClusters++; } } } long time_2 = System.currentTimeMillis(); elapsedTime = (double) (time_2 - time_1) / 1000.0; FastVector atts = new FastVector(); Attribute att = new Attribute("clusterID"); atts.addElement(att); double[][] arr = new CommonMethords().InstanceToArrays(instances); for (int i = 0; i < arr[0].length; i++) { Attribute att2 = new Attribute("Subject" + String.valueOf(i)); atts.addElement(att2); } ID = new Instances("doc-subject", atts, arr.length); for (int i = 0; i < arr.length; i++) { double[] attsarr = new double[arr.length + 1]; DataObject t = database.getDataObject(Integer.toString(i)); if (t.getClusterLabel() == DataObject.NOISE) attsarr[0] = -1; else attsarr[0] = t.getClusterLabel(); for (int j = 0; j < arr[0].length; j++) { attsarr[j + 1] = arr[0][j]; } Instance ins = new Instance(1, attsarr); ID.add(ins); } }