List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:oxis.yologp.YOLogPDescriptor.java
License:Open Source License
/** * Train a model, erase the other one//from w w w. j a va2 s.c o m * * @param String name of the model to save */ public void train(String name) throws Exception { compute(); Instances instances = buildDataset(); model = new RandomForest(); Map<Object, Object> properties; for (DrugStruct drugStruct : listDrug) { if (drugStruct.drug.getProperty("flag")) { properties = drugStruct.drug.getProperties(); Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024 instance.setDataset(instances); for (Object propKey : properties.keySet()) { if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) { try { instance.setValue(instances.attribute(propKey.toString()), Double.parseDouble(properties.get(propKey).toString())); } catch (NullPointerException ex) { Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.WARNING, "Property not used: {0}", propKey.toString()); } } } instance.setClassValue(drugStruct.getLogP()); instances.add(instance); } } model.setNumFeatures(200); model.setNumTrees(400); model.setMaxDepth(0); model.buildClassifier(instances); weka.core.SerializationHelper.write(path + name, model); }
From source file:Part2.HierarchicalClusterer.java
License:Open Source License
/** calculate the distance between two clusters * @param cluster1 list of indices of instances in the first cluster * @param cluster2 dito for second cluster * @return distance between clusters based on link type *//*from ww w. ja va 2 s .c o m*/ double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) { double fBestDist = Double.MAX_VALUE; //double SemiDist = m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2); switch (m_nLinkType) { case SINGLE: // find single link distance aka minimum link, which is the closest distance between // any item in cluster1 and any item in cluster2 fBestDist = Double.MAX_VALUE; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist > fDist) { fBestDist = fDist; } } } break; case COMPLETE: case ADJCOMLPETE: // find complete link distance aka maximum link, which is the largest distance between // any item in cluster1 and any item in cluster2 fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist < fDist) { fBestDist = fDist; } } } if (m_nLinkType == COMPLETE) { break; } // calculate adjustment, which is the largest within cluster distance double fMaxDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = i + 1; j < cluster1.size(); j++) { int i2 = cluster1.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } for (int i = 0; i < cluster2.size(); i++) { int i1 = cluster2.elementAt(i); for (int j = i + 1; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } fBestDist -= fMaxDist; break; case AVERAGE: // finds average distance between the elements of the two clusters fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); fBestDist += fDistance[i1][i2]; } } fBestDist /= (cluster1.size() * cluster2.size()); break; case MEAN: { // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering) Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); fBestDist = 0; for (int i = 0; i < merged.size(); i++) { int i1 = merged.elementAt(i); for (int j = i + 1; j < merged.size(); j++) { int i2 = merged.elementAt(j); fBestDist += fDistance[i1][i2]; } } int n = merged.size(); fBestDist /= (n * (n - 1.0) / 2.0); } break; case CENTROID: // finds the distance of the centroids of the clusters double[] fValues1 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster1.size(); i++) { Instance instance = m_instances.instance(cluster1.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] += instance.value(j); } } double[] fValues2 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster2.size(); i++) { Instance instance = m_instances.instance(cluster2.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues2[j] += instance.value(j); } } for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] /= cluster1.size(); fValues2[j] /= cluster2.size(); } // set up two instances for distance function Instance instance1 = (Instance) m_instances.instance(0).copy(); Instance instance2 = (Instance) m_instances.instance(0).copy(); for (int j = 0; j < m_instances.numAttributes(); j++) { instance1.setValue(j, fValues1[j]); instance2.setValue(j, fValues2[j]); } fBestDist = m_DistanceFunction.distance(instance1, instance2); break; case WARD: { // finds the distance of the change in caused by merging the cluster. // The information of a cluster is calculated as the error sum of squares of the // centroids of the cluster and its members. double ESS1 = calcESS(cluster1); double ESS2 = calcESS(cluster2); Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); double ESS = calcESS(merged); fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size(); } break; } //double alpha = 1; //return alpha*SemiDist + (1-alpha)*fBestDist; return fBestDist; }
From source file:Part2.HierarchicalClustererEx.java
License:Open Source License
/** calculate the distance between two clusters * @param cluster1 list of indices of instances in the first cluster * @param cluster2 dito for second cluster * @return distance between clusters based on link type */// ww w. ja v a2 s.c o m double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) { double fBestDist = Double.MAX_VALUE; //double SemiDist = m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2); switch (m_nLinkType) { case SINGLE: // find single link distance aka minimum link, which is the closest distance between // any item in cluster1 and any item in cluster2 fBestDist = Double.MAX_VALUE; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist > fDist) { fBestDist = fDist; } } } break; case COMPLETE: case ADJCOMLPETE: // find complete link distance aka maximum link, which is the largest distance between // any item in cluster1 and any item in cluster2 fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fBestDist < fDist) { fBestDist = fDist; } } } if (m_nLinkType == COMPLETE) { break; } // calculate adjustment, which is the largest within cluster distance double fMaxDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = i + 1; j < cluster1.size(); j++) { int i2 = cluster1.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } for (int i = 0; i < cluster2.size(); i++) { int i1 = cluster2.elementAt(i); for (int j = i + 1; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); double fDist = fDistance[i1][i2]; if (fMaxDist < fDist) { fMaxDist = fDist; } } } fBestDist -= fMaxDist; break; case AVERAGE: // finds average distance between the elements of the two clusters fBestDist = 0; for (int i = 0; i < cluster1.size(); i++) { int i1 = cluster1.elementAt(i); for (int j = 0; j < cluster2.size(); j++) { int i2 = cluster2.elementAt(j); fBestDist += fDistance[i1][i2]; } } fBestDist /= (cluster1.size() * cluster2.size()); break; case MEAN: { // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering) Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); fBestDist = 0; for (int i = 0; i < merged.size(); i++) { int i1 = merged.elementAt(i); for (int j = i + 1; j < merged.size(); j++) { int i2 = merged.elementAt(j); fBestDist += fDistance[i1][i2]; } } int n = merged.size(); fBestDist /= (n * (n - 1.0) / 2.0); } break; case CENTROID: // finds the distance of the centroids of the clusters double[] fValues1 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster1.size(); i++) { Instance instance = m_instances.instance(cluster1.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] += instance.value(j); } } double[] fValues2 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster2.size(); i++) { Instance instance = m_instances.instance(cluster2.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues2[j] += instance.value(j); } } for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] /= cluster1.size(); fValues2[j] /= cluster2.size(); } // set up two instances for distance function Instance instance1 = (Instance) m_instances.instance(0).copy(); Instance instance2 = (Instance) m_instances.instance(0).copy(); for (int j = 0; j < m_instances.numAttributes(); j++) { instance1.setValue(j, fValues1[j]); instance2.setValue(j, fValues2[j]); } fBestDist = m_DistanceFunction.distance(instance1, instance2); break; case WARD: { // finds the distance of the change in caused by merging the cluster. // The information of a cluster is calculated as the error sum of squares of the // centroids of the cluster and its members. double ESS1 = calcESS(cluster1); double ESS2 = calcESS(cluster2); Vector<Integer> merged = new Vector<Integer>(); merged.addAll(cluster1); merged.addAll(cluster2); double ESS = calcESS(merged); fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size(); } break; } double alpha = 0.5; //return alpha*SemiDist + (1-alpha)*fBestDist; return fBestDist; }
From source file:pl.nask.hsn2.service.analysis.JSWekaAnalyzer.java
License:Open Source License
public final JSClass classifyString(File file) { String ngrams = NGramsCalc.getNgramsForFile(file.getPath(), ngramsLength, ngramsQuantity); if (ngrams == null) { LOGGER.info("No ngrams extracted, probably JS source is too short"); } else {// ww w. j a va2 s .c o m StringTokenizer st = new StringTokenizer(ngrams, " "); if (st.countTokens() >= ngramsQuantity) { Instance t = new Instance(2); t.setDataset(trainingSet); t.setValue(0, ngrams); try { double dd = fc.classifyInstance(t); return JSClass.valueOf(trainingSet.classAttribute().value((int) dd).toUpperCase()); } catch (Exception e) { LOGGER.error(e.getMessage(), e); } } } return JSClass.UNCLASSIFIED; }
From source file:Prediccion.PrecidePasoNodo.java
License:Open Source License
Instances cargarDatos(int hora) throws ParseException { //Declaramos los atributos de las instancias Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss"); Attribute a1 = new Attribute("Total"); ArrayList<Attribute> c = new ArrayList<>(); c.add(a0);//w w w . j a v a 2 s. c om c.add(a1); //Creamos el conjunto de instancias Instances instances = new Instances(nodo, c, 1000); //Instanciamos conexion con FT cFT = new conectarFusionTables(); Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total", "idNodo = " + nodo + " and Intervalo ENDS WITH '00:00:00'", "ORDER BY \'Intervalo\' DESC LIMIT 10000"); for (List<Object> a : r.getRows()) { Instance i = new DenseInstance(2); String s0 = (String) a.get(0); String s1 = (String) a.get(1); System.err.println(s0 + " ->" + s1); i.setValue(instances.attribute(0), instances.attribute(0).parseDate(s0)); i.setValue(instances.attribute(1), Integer.parseInt(s1)); instances.add(i); } instances.sort(0); return instances; }
From source file:Prediccion.Prediccion.java
License:Open Source License
ArrayList<Instances> cargarDatos() throws ParseException { //Declaramos los atributos de las instancias Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss"); Attribute a1 = new Attribute("Total"); ArrayList<Attribute> c = new ArrayList<>(); c.add(a0);/*from w ww .j a v a 2 s . co m*/ c.add(a1); //Creamos el conjunto de instancias ArrayList<Instances> instances = new ArrayList<>(24); for (int i = 0; i < 24; i++) { instances.add(new Instances(nodo, c, 1000)); } //Instanciamos conexion con FT cFT = new conectarFusionTables(); Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total", "idNodo = " + nodo + " ", "ORDER BY \'Intervalo\' DESC LIMIT 10000"); try { System.err.println(r.toPrettyString()); } catch (IOException ex) { Logger.getLogger(Prediccion.class.getName()).log(Level.SEVERE, null, ex); } for (List<Object> a : r.getRows()) { Instance i = new DenseInstance(2); String s0 = (String) a.get(0); String s1 = (String) a.get(1); int hora = Integer.parseInt(s0.substring(11, 13)); System.err.println(s0 + " ->" + s1 + " " + hora); i.setValue(instances.get(hora).attribute(0), instances.get(hora).attribute(0).parseDate(s0)); i.setValue(instances.get(hora).attribute(1), Integer.parseInt(s1)); instances.get(hora).add(i); } for (Instances a : instances) { a.sort(0); } return instances; }
From source file:predictors.HelixIndexer.java
License:Open Source License
/** * Analyzes a given window and saves it in the database. * /*from w w w .j a va 2 s. c om*/ * @param pssm * @param windowCenter * @param structure */ private void addWindowToDatabase(Pssm pssm, int windowCenter, char[] structure) { int index = Mappings.ssToInt(structure[windowCenter]); Instance window = this.buildInstance(pssm, windowCenter); if (index == Mappings.indexTmh) { index = HelixIndexer.indexTmh; } else if (index == Mappings.indexSignal) { index = HelixIndexer.indexSignal; } else { index = HelixIndexer.indexNotTmh; } window.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), index); window.setDataset(this.dataset); this.dataset.add(window); }
From source file:predictors.HelixPredictor.java
License:Open Source License
/** * Analyzes a given segment (TMH or not) and saves it in the database. * /*from ww w .ja va2s . c o m*/ * @param pssm * @param start * @param end * @param structureIndex */ private void addSegmentToDatabse(Pssm pssm, int start, int end, int structureIndex) { Instance segment = this.buildInstance(pssm, start, end); segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex); segment.setDataset(this.dataset); this.dataset.add(segment); }
From source file:predictors.TopologyPredictor.java
License:Open Source License
/** * Analyzes a given window and saves it in the database. * /*ww w . j ava 2 s . c om*/ * @param pssm * @param structure * @param structureIndex * @param startPos */ private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) { ArrayList<Segment> solSegments = findSegments(structure); Instance segment = this.buildInstance(pssm, structure, solSegments, startPos); segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex); segment.setDataset(this.dataset); this.dataset.add(segment); }
From source file:probcog.bayesnets.core.ClustererDiscretizationFilter.java
License:Open Source License
public String getValueForContinuous(double continuous) { Instance inst = new Instance(1); inst.setValue(0, continuous); try {/*from ww w . j ava 2 s . com*/ int cluster = clusterer.clusterInstance(inst); return outputValues[cluster]; } catch (Exception e) { e.printStackTrace(); return null; } }