List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Recursively generates a tree./*from www .j a v a 2s . co m*/ * * @param data * the data to work with * @param classProbs * the class distribution * @param header * the header of the data * @param minNum * the minimum number of instances per leaf * @param debug * whether debugging is on * @param attIndicesWindow * the attribute window to choose attributes from * @param random * random number generator for choosing random attributes * @param depth * the current depth * @param determineStructure * whether to determine structure * @param m_distributionData * HashMap to put distribution data if getSplitData is true in * any node * @throws Exception * if generation fails */ protected void buildTree(Instances data, double[] classProbs, Instances header, boolean debug, int depth, JsonNode node, int parent_index, HashMap m_distributionData, Instances requiredInstances, LinkedHashMap<String, Classifier> custom_classifiers, List<CustomSet> cSList, CustomClassifierService ccService, Dataset ds) throws Exception { if (mapper == null) { mapper = new ObjectMapper(); } // Store structure of dataset, set minimum number of instances m_Info = header; m_Debug = debug; // if in dead json return if (node == null) { m_Attribute = -1; m_ClassDistribution = null; m_Prop = null; return; } // Make leaf if there are no training instances if (data.numInstances() == 0) { m_Attribute = -1; m_ClassDistribution = null; m_Prop = null; return; } // Check if node doesn't contain enough instances or is pure // or maximum depth reached m_ClassDistribution = classProbs.clone(); cSetList = cSList; ccSer = ccService; d = ds; // if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum // || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)], // Utils // .sum(m_ClassDistribution)) // || ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) { // // Make leaf // m_Attribute = -1; // m_Prop = null; // return; // } // Investigate the selected attribute int attIndex = parent_index; // options child added by web client developer // TODO work with him to make a more meaningful structure... JsonNode options = node.get("options"); if (options == null) { return; } String kind = options.get("kind").asText(); JsonNode att_name = options.get("attribute_name"); Boolean getSplitData = false; Boolean getInstanceData = false; // this allows me to modify the json tree structure to add data about // the evaluation ObjectNode evalresults = (ObjectNode) options; ObjectNode _node = (ObjectNode) node; //For Roc - Node Match _node.set("roc_uid_0", null); _node.set("roc_uid_1", null); Map<String, JsonNode> sons = new HashMap<String, JsonNode>(); // String name = node_name.asText(); if (kind != null && kind.equals("split_node") && att_name != null) { // // attIndex = data.attribute(node_id.asText()).index(); if (!att_name.asText().equals("") && !att_name.asText().contains("custom_classifier") && !att_name.asText().contains("custom_tree") && !att_name.asText().contains("custom_set")) { attIndex = data.attribute(att_name.asText()).index(); } else { if (att_name.asText().contains("custom_set")) { int ctr = 0; for (CustomSet c : cSList) { if (c.getId() == Long.valueOf(att_name.asText().replace("custom_set_", ""))) { break; } ctr++; } attIndex = (data.numAttributes() - 1) + custom_classifiers.size() + ctr; } else { if (att_name.asText().contains("custom_classifier_new")) { HashMap mp = ccSer.buildCustomClasifier(data, Long.valueOf(att_name.asText().replace("custom_classifier_new_", ""))); Classifier fc = (Classifier) mp.get("classifier"); custom_classifiers.put("custom_classifier_" + mp.get("id"), fc); evalresults.put("unique_id", "custom_classifier_" + mp.get("id")); evalresults.put("attribute_name", "custom_classifier_" + mp.get("id")); att_name = evalresults.get("attribute_name"); } int ctr = 0; for (String key : custom_classifiers.keySet()) { if (key.equals(att_name.asText())) { break; } ctr++; } attIndex = (data.numAttributes() - 1) + ctr; } } if (node.get("getSplitData") != null) { getSplitData = node.get("getSplitData").asBoolean(); } JsonNode split_values = node.get("children"); int c = 0; if (split_values != null && split_values.size() > 0) { for (JsonNode svalue : split_values) { String key = svalue.get("name").asText(); JsonNode son = svalue.get("children").get(0); if (key.contains("<")) { key = "low"; } else if (key.contains(">")) { key = "high"; } sons.put(key, son); c++; } } // LOGGER.debug("Id name "+att_name+" index "+attIndex+" type "+kind+" sons "+c); } else { // LOGGER.debug("non split node, name "+att_name+" type "+kind); } double[] vals = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()]; double[][][] dists = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0][0]; double[][] props = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0]; double[] splits = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()]; listOfFc = custom_classifiers; // Compute class distributions and value of splitting // criterion for each attribute HashMap<String, Double> mp = new HashMap<String, Double>(); if (attIndex >= data.numAttributes() && attIndex < data.numAttributes() + custom_classifiers.size()) { mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers); } else if (attIndex >= data.numAttributes() + custom_classifiers.size() - 1) { mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers); } else { if (options.get("split_point") != null) { mp = distribution(props, dists, attIndex, data, options.get("split_point").asDouble(), custom_classifiers); } else { mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers); } } splits[attIndex] = mp.get("split_point"); vals[attIndex] = gain(dists[attIndex], priorVal(dists[attIndex])); m_Attribute = attIndex; double[][] distribution = dists[m_Attribute]; // stop if input json tree does not contain any more children // replacing Utils.gr(vals[m_Attribute], 0)&& if (kind != null && kind.equals("split_node") && att_name != null) { //Assign Classes for custom sets(visual splits). m_ClassAssignment.put("Inside", Utils.maxIndex(dists[m_Attribute][1])); m_ClassAssignment.put("Outside", (Utils.maxIndex(dists[m_Attribute][1]) == 1) ? 0 : 1); // Build subtrees m_SplitPoint = splits[m_Attribute]; m_Prop = props[m_Attribute]; Instances[] subsets = splitData(data); m_Successors = new ManualTree[distribution.length]; // record quantity and quality measures for node int quantity = 0; for (int i = 0; i < distribution.length; i++) { quantity += subsets[i].numInstances(); } evalresults.put("bin_size", quantity); evalresults.put("infogain", vals[m_Attribute]); evalresults.put("majClass", m_Info.classAttribute().value(Utils.maxIndex(m_ClassDistribution))); evalresults.put("split_point", m_SplitPoint); evalresults.put("orig_split_point", mp.get("orig_split_point")); if (Boolean.TRUE.equals(getSplitData)) { addDistributionData(data, m_Attribute, m_distributionData); } int maxIndex = 0; double maxCount = 0; double errors = 0; double[] classDist = new double[2]; double pct_correct = 0; double bin_size = 0; for (int i = 0; i < distribution.length; i++) { m_Successors[i] = new ManualTree(); m_Successors[i].setKValue(m_KValue); m_Successors[i].setMaxDepth(getMaxDepth()); //To compute class distribution for split node. for (int j = 0; j < distribution[i].length; j++) { classDist[j] += distribution[i][j]; } // test an instance to see which child node to send its subset // down. // after split, should hold for all in set String child_name = ""; Instances subset = subsets[i]; if (subset == null || subset.numInstances() == 0) { continue; } Instance inst = subset.instance(0); if (m_Attribute >= data.numAttributes() && m_Attribute < data.numAttributes() + custom_classifiers.size()) { double predictedClass = custom_classifiers.get(att_name.asText()).classifyInstance(inst); child_name = m_Info.classAttribute().value((int) predictedClass); } else if (m_Attribute >= data.numAttributes() + custom_classifiers.size() - 1) { CustomSet cSet = getReqCustomSet( m_Attribute - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList); JsonNode vertices = mapper.readTree(cSet.getConstraints()); ArrayList<double[]> attrVertices = generateVerticesList(vertices); List<Attribute> aList = generateAttributeList(cSet, data, ds); double[] testPoint = new double[2]; testPoint[0] = inst.value(aList.get(0)); testPoint[1] = inst.value(aList.get(1)); int check = checkPointInPolygon(attrVertices, testPoint); if (check == 0) { child_name = "Outside"; } else { child_name = "Inside"; } } else { // which nominal attribute is this split linked to? if (subset.attribute(m_Attribute).isNominal()) { child_name = inst.attribute(m_Attribute).value((int) inst.value(m_Attribute)); } // otherwise, if we have a numeric attribute, are we going // high or low? else if (data.attribute(m_Attribute).isNumeric()) { if (inst.value(m_Attribute) < m_SplitPoint) { child_name = "low"; } else { child_name = "high"; } } } m_Successors[i].setM_ClassAssignment((HashMap<String, Integer>) m_ClassAssignment.clone()); JsonNode son = sons.get(child_name); if (son != null) { m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, son, attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService, ds); } else { // if we are a split node with no input children, we need to // add them into the tree // JsonNode split_values = node.get("children"); if (kind != null && kind.equals("split_node")) { ArrayNode children = (ArrayNode) node.get("children"); if (children == null) { children = mapper.createArrayNode(); } ObjectNode child = mapper.createObjectNode(); child.put("name", child_name); ObjectNode c_options = mapper.createObjectNode(); c_options.put("attribute_name", child_name); c_options.put("kind", "split_value"); child.put("options", c_options); children.add(child); _node.put("children", children); m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, child, attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService, ds); } else { // for leaf nodes, calling again ends the cycle and // fills up the bins appropriately m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, node, attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService, ds); } } } // Compute pct_correct from distributions and send to split_node bin_size = Utils.sum(classDist); maxIndex = Utils.maxIndex(classDist); maxCount = classDist[maxIndex]; String class_name = m_Info.classAttribute().value(maxIndex); _node.put("majClass", class_name); errors += bin_size - maxCount; pct_correct = (quantity - errors) / quantity; evalresults.put("pct_correct", pct_correct); // If all successors are non-empty, we don't need to store the class // distribution boolean emptySuccessor = false; for (int i = 0; i < subsets.length; i++) { if (m_Successors[i].m_ClassDistribution == null) { emptySuccessor = true; break; } } if (!emptySuccessor) { m_ClassDistribution = null; } } else { m_Attribute = -1; if (kind != null && kind.equals("leaf_node")) { double bin_size = 0, maxCount = 0; int maxIndex = 0; double errors = 0; double pct_correct = 0; if (m_ClassDistribution != null) { bin_size = Utils.sum(m_ClassDistribution); maxIndex = Utils.maxIndex(m_ClassDistribution); // this is // where it // decides // what // class the // leaf is.. // takes the // majority. maxCount = m_ClassDistribution[maxIndex]; errors = bin_size - maxCount; pct_correct = (bin_size - errors) / bin_size; } if (node.get("pickInst") != null) { getInstanceData = node.get("pickInst").asBoolean(); } if (Boolean.TRUE.equals(getInstanceData)) { requiredInstances.delete(); for (int k = 0; k < data.numInstances(); k++) { requiredInstances.add(data.instance(k)); } } String class_name = m_Info.classAttribute().value(maxIndex); _node.put("majClass", class_name); if (node.get("setClass") != null) { String setClass = node.get("setClass").asText(); class_name = m_Info.classAttribute().value(m_ClassAssignment.get(setClass)); } _node.put("name", class_name); evalresults.put("attribute_name", class_name); evalresults.put("kind", "leaf_node"); evalresults.put("bin_size", Utils.doubleToString(bin_size, 2)); evalresults.put("errors", Utils.doubleToString(errors, 2)); evalresults.put("pct_correct", Utils.doubleToString(pct_correct, 2)); this.setJsonnode(_node); } else { // Make leaf // add the data to the json object double bin_size = 0, maxCount = 0; int maxIndex = 0; double errors = 0; double pct_correct = 0; if (m_ClassDistribution != null) { bin_size = Utils.sum(m_ClassDistribution); maxIndex = Utils.maxIndex(m_ClassDistribution); // this is // where it // decides // what // class the // leaf is.. // takes the // majority. maxCount = m_ClassDistribution[maxIndex]; errors = bin_size - maxCount; pct_correct = (bin_size - errors) / bin_size; } ArrayNode children = (ArrayNode) node.get("children"); if (children == null) { children = mapper.createArrayNode(); } ObjectNode child = mapper.createObjectNode(); String class_name = m_Info.classAttribute().value(maxIndex); child.put("majClass", class_name); String nodeName = node.get("name").asText(); if (nodeName.equals("Inside") || nodeName.equals("Outside")) { child.put("setClass", nodeName); class_name = m_Info.classAttribute().value(m_ClassAssignment.get(nodeName)); } child.put("name", class_name); ObjectNode c_options = mapper.createObjectNode(); c_options.put("attribute_name", class_name); c_options.put("kind", "leaf_node"); c_options.put("bin_size", Utils.doubleToString(bin_size, 2)); c_options.put("errors", Utils.doubleToString(errors, 2)); c_options.put("pct_correct", Utils.doubleToString(pct_correct, 2)); child.put("options", c_options); children.add(child); _node.put("children", children); this.setJsonnode(child); } } }
From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java
License:Open Source License
public Instances getInstances(Instance inst) { Instances insts;// www . j a v a 2s . c om FastVector atts = new FastVector(); for (int i = 0; i < inst.numAttributes(); i++) { atts.addElement(inst.attribute(i)); } insts = new Instances("CurrentTrain", atts, 0); insts.add(inst); insts.setClassIndex(inst.numAttributes() - 1); return insts; }
From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java
License:Open Source License
/** * Get Prediction for a given instance based on current model * * @param instance/*from www .ja v a 2 s . com*/ */ public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException { // transform training instance into real instance Instance wekaInstance = new Instance(numberOfAttributes); wekaInstance.setDataset(instances); // add values from old tuple for (Cell cell : instance.getDirtyTuple().getCells()) { if (isPermitted(cell.getColumn())) { if (cell.getValue() instanceof String) { wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString()); } else { double doubleValue = Double.parseDouble(cell.getValue().toString()); wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue); } } } // add new value, check its type from the dirty value if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) { wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue()); } else { double doubleValue = Double.parseDouble(instance.getUpdatedValue()); } // add similarity wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore()); double[] result = getPrediction(wekaInstance); // now convert this result into readable form ClassificationResult classificationResult = new ClassificationResult(result, wekaInstance.attribute(this.numberOfAttributes - 1)); return classificationResult; }
From source file:Reader.KnnClassifier.java
/** * Looks at the k closest known instances to try and guess which letter is in * an image.//from w w w.jav a2 s. com * @param instance - The instance to classify * @param k - The number of neighbors to look at when determining the class * @return A double value representing the letter in the image */ public double classifyInstance(Instance instance, int k) { int size = trainingData.numInstances(); int attributes = trainingData.numAttributes() - 1; float dist; Map<Float, Instance> neighbors = new TreeMap<>(); Instance test; for (int i = 0; i < size; i++) { dist = 0; test = trainingData.instance(i); for (int j = 0; j < attributes; j++) { dist += Math.abs(test.value(test.attribute(j)) - instance.value(test.attribute(j))); } neighbors.put(dist, test); } return findMostCommon(neighbors, k); }
From source file:regression.logisticRegression.LogisticRegressionCorrect.java
public void weka(JTextArea output) throws FileNotFoundException, IOException, Exception { this.finalPoints = new ArrayList<>(); BufferedReader reader = new BufferedReader(new FileReader("weka.arff")); Instances instances = new Instances(reader); instances.setClassIndex(instances.numAttributes() - 1); String[] options = new String[4]; options[0] = "-R"; options[1] = "1.0E-8"; options[2] = "-M"; options[3] = "-1"; logistic.setOptions(options);/*from w w w.ja va2 s .com*/ logistic.buildClassifier(instances); for (int i = 0; i < instances.numInstances(); i++) { weka.core.Instance inst = instances.instance(i); Double classifiedClass = 1.0; if (logistic.classifyInstance(inst) == 1.0) { classifiedClass = 0.0; } System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass); double[] distributions = logistic.distributionForInstance(inst); output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: " + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n"); this.finalPoints.add(new Point(inst.value(0), classifiedClass)); this.finalProbPoints.add(new Point(inst.value(0), distributions[0])); for (int j = 0; j < distributions.length; j++) { System.out.println("distribution: " + inst.value(0) + "->" + distributions[j]); } } // evaluate classifier and print some statistics Evaluation eval = new Evaluation(instances); eval.evaluateModel(logistic, instances); FastVector pred = eval.predictions(); for (int i = 0; i < eval.predictions().size(); i++) { } System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:regression.logisticRegression.LogisticRegressionCorrect.java
public void singleTest(Instances instances, JTextArea output) throws Exception { for (int i = 0; i < instances.numInstances(); i++) { weka.core.Instance inst = instances.instance(i); Double classifiedClass = 1.0; if (logistic.classifyInstance(inst) == 1.0) { classifiedClass = 0.0;//w ww. ja v a 2 s . co m } System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass); double[] distributions = logistic.distributionForInstance(inst); output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: " + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n"); } }
From source file:script.OperationsHandler.java
private String getCreditStatus(Instance record) { return record.attribute(STATUS_ATTRIBUTE_NUMBER).value((int) record.value(STATUS_ATTRIBUTE_NUMBER)); }
From source file:sirius.nnsearcher.main.Constraints.java
License:Open Source License
public boolean isViolated(FastaFormat fastaFormat, weka.core.Instance instance, ApplicationData applicationData) { double attributeValue; if (instance == null) { //attribute not found in Instance //generate it attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData); } else {/*from w ww . ja va 2 s . co m*/ if (this.index == -1) //find the index of attribute in instance findIndex(instance); else if (instance.attribute(this.index).name().compareTo(this.attributeName.name()) != 0) findIndex(instance); if (this.index == -1) //attribute not found in Instance //generate it attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData); else attributeValue = instance.value(this.index); } //Then check if it violate the constraint /* * 0) >= * 1) > * 2) == * 3) != * 4) <= * 5) < */ boolean violated = true; switch (operator) { case 0: if (attributeValue >= this.value) violated = false; break; case 1: if (attributeValue > this.value) violated = false; break; case 2: if (attributeValue == this.value) violated = false; break; case 3: if (attributeValue != this.value) violated = false; break; case 4: if (attributeValue <= this.value) violated = false; break; case 5: if (attributeValue < this.value) violated = false; break; default: throw new Error("Unknown Operator"); } return violated; }
From source file:swm.project.mappings.OurDistance.java
@Override public double distance(Instance instnc, Instance instnc1) { int num = instnc.numAttributes(); List<Double> movieClusterRating1 = new ArrayList<Double>(), movieClusterRating2 = new ArrayList<Double>(); Attribute id = instnc.attribute(0); for (int index = 1; index < num; index++) { }//from w w w.j a va 2s . c om return 1; }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
protected void selfTrain(Instance testInst) { int maxInstances = this.maxInstancesOption.getValue(); int poolSizeRatio = poolSizeOption.getValue(); int poolLimit = maxInstances / poolSizeRatio; int poolCount = 0; VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool(); noOfClassesInPool = vInstPool.getNoOfClasses(); System.out.println("No of instances in the pool: " + vInstPool.getSize()); System.out.println("No of classes in the pool: " + noOfClassesInPool); if (vInstPool.getSize() > 10) { ArrayList<Attribute> attrs = new ArrayList<Attribute>(); for (int i = 0; i < testInst.numAttributes(); i++) { attrs.add(testInst.attribute(i)); }//from w w w .j av a 2 s. co m Instances instances = new Instances("instances", attrs, vInstPool.getSize()); Iterator instanceIt = vInstPool.iterator(); System.out.println("Size of pool: " + vInstPool.getSize()); while (instanceIt.hasNext() && poolCount < poolLimit) { VotedInstance vInstance = (VotedInstance) instanceIt.next(); ((Instances) instances).add(vInstance.getInstance()); poolCount++; } System.out.println("Size of instances: " + instances.size()); instances = clusterInstances(instances); InstanceStream activeStream = new CachedInstancesStream((Instances) instances); System.out.println("Selftraining have been started"); System.out.println("Number of self training instances: " + instances.numInstances()); long treeSize = vInstPool.getSize(); long limit = treeSize / SAMPLING_LIMIT; Instance inst = null; for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) { inst = activeStream.nextInstance(); if (inst.numAttributes() == attrs.size()) { model.trainOnInstance(inst); } } } }