List of usage examples for weka.core Instances get
@Override
publicInstance get(int index)
From source file:nlpmusic.StringClusterer.java
public ArrayList<ArrayList<String>> cluster(ArrayList<String> tem) throws Exception { Instances source = listLoad(tem);/*www . j ava 2s . c o m*/ StringToWordVector vect = new StringToWordVector(); vect.setWordsToKeep(to_keep); vect.setInputFormat(source); Instances datas = Filter.useFilter(source, vect); //vect.setDoNotOperateOnPerClassBasis(true); //System.out.println("ASDASD" + vect.wordsToKeepTipText()); //System.out.println(datas.numAttributes()); //System.out.println("ASDASD" + vect.getWordsToKeep()); DBSCAN clusterer = new DBSCAN(); clusterer.setEpsilon(threshold); clusterer.setMinPoints(min_points); clusterer.buildClusterer(datas); ArrayList<ArrayList<String>> ret = new ArrayList<>(); for (int i = 0; i < clusterer.numberOfClusters(); i++) { ArrayList<String> to_add = new ArrayList<>(); //System.out.println(i); for (int j = 0; j < datas.size(); j++) { try { if (clusterer.clusterInstance(datas.get(j)) == i) //System.out.println("* " + source.get(j).toString() + " *"); to_add.add(source.get(j).toString()); } catch (Exception e) { //e.printStackTrace(); } } ret.add(to_add); } return ret; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
private Pair<IntervalRule, Double> createRule(Instances flatData, Instances miData, int iterations) throws Exception { // store for the distances between the reference distance and all others double[] distances = new double[flatData.numInstances()]; // the distance function DistanceFunction distFunc = new EuclideanDistance(flatData); // permutation which sorts the distances Integer[] perm = new Integer[flatData.numInstances()]; IntervalRule bestRule = null;//from www . j a v a2s . co m double bestRuleScore = -Double.MAX_VALUE; // retrieve the best rule heuristically for a number of iterations for (int ruleIterations = 0; ruleIterations < iterations; ruleIterations++) { // System.out.println("------- Iteration " + ruleIterations // + "----------"); // randomly select an initial instance, i.e. selecting a positive // bag // randomly and taking the instance with the largest weight Random r = new Random(); int bagIdx; while (miData.get(bagIdx = r.nextInt(miData.numInstances())).value(2) == 0) ; // the reference instance for the next rule Instance refInstance = miData.get(bagIdx).relationalValue(1).firstInstance(); for (Instance i : miData.get(bagIdx).relationalValue(1)) { if (i.weight() > refInstance.weight()) { refInstance = i; } } // System.out.println("\tRef Instance: " + refInstance); IntervalRule rule = new IntervalRule(); rule.updateClassifier(refInstance); // calculate the distance from that particular reference instance to // all other // positive instances (negatives are set to NaN) and sort them Arrays.fill(distances, Double.NaN); for (int i = 0; i < distances.length; i++) { if (flatData.get(i).classValue() == 1) { distances[i] = distFunc.distance(refInstance, flatData.get(i)); } } PermutationSort.sortPermInPlace(distances, perm); double ruleScore = 0; double tmpRuleScore = 0; // extend the rule successively by the nearest instances till the // score doesn't increase anymore int instanceIdx = 0; while (true) { if (!Double.isNaN(distances[perm[instanceIdx]])) { IntervalRule tmpRule = new IntervalRule(rule); tmpRule.updateClassifier(flatData.get(perm[instanceIdx])); // System.out.println("\tNext Instance: " // + flatData.get(perm[instanceIdx])); // System.out.println("\tCurrent Rule: " + tmpRule); // evaluate rule tmpRuleScore = ruleScore(tmpRule, flatData); if (tmpRuleScore >= ruleScore) { ruleScore = tmpRuleScore; rule = tmpRule; } else { break; } } instanceIdx++; } if (ruleScore > bestRuleScore) { bestRuleScore = ruleScore; bestRule = rule; } } // iterations per rule return new ValuePair<IntervalRule, Double>(bestRule, bestRuleScore); }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
private Instances toSingleInstanceDataset(Instances miData, Instances flatData) throws Exception { MultiInstanceToPropositional convertToProp = new MultiInstanceToPropositional(); convertToProp.setInputFormat(miData); for (int i = 0; i < miData.numInstances(); i++) { convertToProp.input(miData.instance(i)); }//from w w w. j a va 2 s .c o m convertToProp.batchFinished(); if (flatData == null) { flatData = convertToProp.getOutputFormat(); flatData.deleteAttributeAt(0); // remove the bag index attribute } Instance processed; while ((processed = convertToProp.output()) != null) { processed.setDataset(null); processed.deleteAttributeAt(0); // remove the bag index attribute flatData.add(processed); } // remove class attribute // flatData.setClassIndex(-1); // flatData.deleteAttributeAt(flatData.numAttributes() - 1); // set weights int instanceIdx = 0; for (Instance bag : miData) { for (Instance instance : bag.relationalValue(1)) { flatData.get(instanceIdx).setWeight(instance.weight()); instanceIdx++; } } return flatData; }
From source file:org.openml.webapplication.features.ExtractFeatures.java
License:Open Source License
public static List<Feature> getFeatures(Instances dataset, String defaultClass) { if (defaultClass != null) { dataset.setClass(dataset.attribute(defaultClass)); } else {//from w w w. ja va2 s. co m dataset.setClassIndex(dataset.numAttributes() - 1); } final ArrayList<Feature> resultFeatures = new ArrayList<Feature>(); for (int i = 0; i < dataset.numAttributes(); i++) { Attribute att = dataset.attribute(i); int numValues = dataset.classAttribute().isNominal() ? dataset.classAttribute().numValues() : 0; AttributeStatistics attributeStats = new AttributeStatistics(dataset.attribute(i), numValues); for (int j = 0; j < dataset.numInstances(); ++j) { attributeStats.addValue(dataset.get(j).value(i), dataset.get(j).classValue()); } String data_type = null; Integer numberOfDistinctValues = null; Integer numberOfUniqueValues = null; Integer numberOfMissingValues = null; Integer numberOfIntegerValues = null; Integer numberOfRealValues = null; Integer numberOfNominalValues = null; Integer numberOfValues = null; Double maximumValue = null; Double minimumValue = null; Double meanValue = null; Double standardDeviation = null; AttributeStats as = dataset.attributeStats(i); numberOfDistinctValues = as.distinctCount; numberOfUniqueValues = as.uniqueCount; numberOfMissingValues = as.missingCount; numberOfIntegerValues = as.intCount; numberOfRealValues = as.realCount; numberOfMissingValues = as.missingCount; if (att.isNominal()) { numberOfNominalValues = att.numValues(); } numberOfValues = attributeStats.getTotalObservations(); if (att.isNumeric()) { maximumValue = attributeStats.getMaximum(); minimumValue = attributeStats.getMinimum(); meanValue = attributeStats.getMean(); standardDeviation = 0.0; try { standardDeviation = attributeStats.getStandardDeviation(); } catch (Exception e) { Conversion.log("WARNING", "StdDev", "Could not compute standard deviation of feature " + att.name() + ": " + e.getMessage()); } } if (att.type() == 0) { data_type = "numeric"; } else if (att.type() == 1) { data_type = "nominal"; } else if (att.type() == 2) { data_type = "string"; } else { data_type = "unknown"; } resultFeatures.add(new Feature(att.index(), att.name(), data_type, att.index() == dataset.classIndex(), numberOfDistinctValues, numberOfUniqueValues, numberOfMissingValues, numberOfIntegerValues, numberOfRealValues, numberOfNominalValues, numberOfValues, maximumValue, minimumValue, meanValue, standardDeviation, attributeStats.getClassDistribution())); } return resultFeatures; }
From source file:org.wkwk.classifier.MyC45.java
public double bestThreshold(Instances data, Attribute attr) { data.sort(attr);/* ww w. j av a 2s . com*/ double m_ig = 0; double bestThr = 0; double classTemp = data.get(0).classValue(); double valueTemp = data.get(0).value(attr); Enumeration instEnum = data.enumerateInstances(); double dt; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (classTemp != inst.classValue()) { classTemp = inst.classValue(); dt = valueTemp; valueTemp = inst.value(attr); double threshold = dt + ((valueTemp - dt) / 2); double igTemp = computeInfoGainCont(data, attr, threshold); if (m_ig < igTemp) { m_ig = igTemp; bestThr = threshold; } } } return bestThr; }
From source file:sentinets.Prediction.java
License:Open Source License
public void writeStats(Instances tweetInstances) { //TweetCorpusStatistics stats = new TweetCorpusStatistics(); System.out.println("Stats Instances: \n" + tweetInstances.toSummaryString()); for (int i = 0; i < tweetInstances.size(); i++) { String user = tweetInstances.get(i).stringValue(11 - 1); String mentions = tweetInstances.get(i).stringValue(3 - 1); String hashtags = tweetInstances.get(i).stringValue(14 - 1); String epClass = tweetInstances.get(i).stringValue(15 - 1); String snsClass = tweetInstances.get(i).stringValue(16 - 1); System.out.println("Tweet Details:\t" + user + "\t" + mentions + "\t" + hashtags + "\t" + printDist(classDist.get(i))); //stats.updateStatistics(user, mentions, hashtags, epClass+","+snsClass, classDist.get(i)); }/*w w w . j av a 2s .c o m*/ }
From source file:soccer.core.models.BookKeeperConsistency.java
public void showTheInstance(int index) throws IOException { Instances instances = loader.getDataSet(); Instance i = instances.get(index); System.out.println(i.toString()); }
From source file:svmal.SVMStrategy.java
public static Instances InstancesToInstances2(Instances insts) { Instances result = new Instances(insts, 0, 0); for (int i = 0; i < insts.numInstances(); i++) { Instance orig = insts.get(i); Instance2 inst2 = new Instance2(orig.weight(), orig.toDoubleArray()); inst2.setDataset(result);/*from w w w. j a va2 s . co m*/ result.add(inst2); } return result; }
From source file:swm.project.mappings.UserToUserCluster.java
private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception { Reader reader;//from ww w . j a v a2 s . co m userToUserClusterHistory = new HashMap<>(); userClustersToUsersHistory = new HashMap<>(); reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS); Instances instanceValues = new Instances(reader); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setNumClusters(20); kmeans.setPreserveInstancesOrder(true); kmeans.setDistanceFunction(new EuclideanDistance()); kmeans.buildClusterer(instanceValues); int[] assignments = kmeans.getAssignments(); int userid = 0; for (int clusterNo : assignments) { int user = (int) instanceValues.get(userid).value(0); userToUserClusterHistory.put(user, clusterNo); ArrayList<Integer> users = new ArrayList<>(); if (userClustersToUsersHistory.containsKey(clusterNo)) { users = userClustersToUsersHistory.get(clusterNo); users.add(user); } else { users.add(user); userClustersToUsersHistory.put(clusterNo, users); } userid++; } }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if an error occurred during the prediction *//* ww w . jav a 2s. com*/ public double[] distributionForInstance(Instance instance) throws Exception { if (m_Train.numInstances() == 0) { //throw new Exception("No training instances!"); return m_defaultModel.distributionForInstance(instance); } if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) { m_kNNValid = false; boolean deletedInstance = false; while (m_Train.numInstances() > m_WindowSize) { m_Train.delete(0); } //rebuild datastructure KDTree currently can't delete if (deletedInstance == true) m_NNSearch.setInstances(m_Train); } // Select k by cross validation if (!m_kNNValid && (m_CrossValidate) && (m_kNNUpper >= 1)) { crossValidate(); } m_NNSearch.addInstanceInfo(instance); Instances neighbours = m_NNSearch.kNearestNeighbours(instance, m_kNN); double[] distances = m_NNSearch.getDistances(); System.out.print("distances weka "); for (int i = 0; i < distances.length; ++i) System.out.print(" " + distances[i]); System.out.println(); System.out.println("Neighbours"); for (int i = 0; i < neighbours.size(); ++i) System.out.println(neighbours.get(i)); double[] distribution = makeDistribution(neighbours, distances); return distribution; }