List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:org.pentaho.di.scoring.WekaScoringMeta.java
License:Open Source License
/** * Generates row meta data to represent the fields output by this step * * @param row the meta data for the output produced * @param origin the name of the step to be used as the origin * @param info The input rows metadata that enters the step through the * specified channels in the same order as in method getInfoSteps(). * The step metadata can then choose what to do with it: ignore it or * not./*from w w w . jav a2s . c o m*/ * @param nextStep if this is a non-null value, it's the next step in the * transformation. The one who's asking, the step where the data is * targetted towards. * @param space not sure what this is :-) * @throws KettleStepException if an error occurs */ @Override public void getFields(RowMetaInterface row, String origin, RowMetaInterface[] info, StepMeta nextStep, VariableSpace space) throws KettleStepException { if (m_model == null && !Const.isEmpty(getSerializedModelFileName())) { // see if we can load from a file. String modName = getSerializedModelFileName(); // if (!modelFile.exists()) { try { if (!WekaScoringData.modelFileExists(modName, space)) { throw new KettleStepException( BaseMessages.getString(PKG, "WekaScoring.Error.NonExistentModelFile")); //$NON-NLS-1$ } WekaScoringModel model = WekaScoringData.loadSerializedModel(m_modelFileName, getLog(), space); setModel(model); } catch (Exception ex) { throw new KettleStepException( BaseMessages.getString(PKG, "WekaScoring.Error.ProblemDeserializingModel"), ex); //$NON-NLS-1$ } } if (m_model != null) { Instances header = m_model.getHeader(); String classAttName = null; boolean supervised = m_model.isSupervisedLearningModel(); if (supervised) { classAttName = header.classAttribute().name(); if (header.classAttribute().isNumeric() || !m_outputProbabilities) { int valueType = (header.classAttribute().isNumeric()) ? ValueMetaInterface.TYPE_NUMBER : ValueMetaInterface.TYPE_STRING; ValueMetaInterface newVM = new ValueMeta(classAttName + "_predicted", //$NON-NLS-1$ valueType); newVM.setOrigin(origin); row.addValueMeta(newVM); } else { for (int i = 0; i < header.classAttribute().numValues(); i++) { String classVal = header.classAttribute().value(i); ValueMetaInterface newVM = new ValueMeta(classAttName + ":" //$NON-NLS-1$ + classVal + "_predicted_prob", ValueMetaInterface.TYPE_NUMBER); //$NON-NLS-1$ newVM.setOrigin(origin); row.addValueMeta(newVM); } } } else { if (m_outputProbabilities) { try { int numClusters = ((WekaScoringClusterer) m_model).numberOfClusters(); for (int i = 0; i < numClusters; i++) { ValueMetaInterface newVM = new ValueMeta("cluster_" + i //$NON-NLS-1$ + "_predicted_prob", ValueMetaInterface.TYPE_NUMBER); //$NON-NLS-1$ newVM.setOrigin(origin); row.addValueMeta(newVM); } } catch (Exception ex) { throw new KettleStepException( BaseMessages.getString(PKG, "WekaScoringMeta.Error.UnableToGetNumberOfClusters"), //$NON-NLS-1$ ex); } } else { ValueMetaInterface newVM = new ValueMeta("cluster#_predicted", //$NON-NLS-1$ ValueMetaInterface.TYPE_NUMBER); newVM.setOrigin(origin); row.addValueMeta(newVM); } } } }
From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java
License:Open Source License
private Instances setPreNormalizedRegressionClassType(Instances inst) throws Exception { if (inst != null) { // x bounds double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; m_attributeRanges = new double[inst.numAttributes()]; m_attributeBases = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { min = 0;/*from w w w. j a v a 2 s. co m*/ max = 1; m_attributeRanges[noa] = (max - min) / 2; m_attributeBases[noa] = (max + min) / 2; } if (inst.classAttribute().isNumeric()) { m_numeric = true; } else { m_numeric = false; } } return inst; }
From source file:org.wkwk.classifier.MyC45.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { splitAttribute = null;/*from w w w .j a v a 2 s.c o m*/ } // Calculate information gain for all attributes, except class attribute double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes() - 1; i++) { Attribute m_attr = data.attribute(i); if (m_attr.isNominal()) { infoGains[i] = computeInfoGain(data, data.attribute(i)); } else if (m_attr.isNumeric()) { infoGains[i] = computeInfoGainCont(data, data.attribute(i), bestThreshold(data, m_attr)); } } splitAttribute = data.attribute(Utils.maxIndex(infoGains)); if (splitAttribute.isNumeric()) { attrThreshold = bestThreshold(data, splitAttribute); } if (Utils.eq(infoGains[splitAttribute.index()], 0)) { splitAttribute = null; classDistribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); classDistribution[inst]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { Instances[] splitData = null; if (splitAttribute.isNominal()) { splitData = splitData(data, splitAttribute); } else if (splitAttribute.isNumeric()) { splitData = splitDataCont(data, splitAttribute, attrThreshold); } if (splitAttribute.isNominal()) { successors = new MyC45[splitAttribute.numValues()]; for (int i = 0; i < splitAttribute.numValues(); i++) { successors[i] = new MyC45(); successors[i].makeTree(splitData[i]); } } else if (splitAttribute.isNumeric()) { successors = new MyC45[2]; for (int i = 0; i < 2; i++) { successors[i] = new MyC45(); successors[i].makeTree(splitData[i]); } } } if (isPruned) { data = prune(data); } }
From source file:reactivetechnologies.sentigrade.dto.VectorRequestData.java
License:Apache License
/** * Get the nominal options for a class attribute. * @param texts//from w w w .j a v a 2 s. c o m * @return */ public static List<String> classAttrNominals(Instances texts) { List<String> l = new LinkedList<>(); Attribute a = texts.classAttribute(); if (a.isNominal()) { for (Object o : Collections.list(a.enumerateValues())) { l.add(o.toString()); } } return l; }
From source file:recsys.EvaluationMachineLearning.java
public static void main(String args[]) throws Exception { int own_training = 0; //opening the testing file DataSource sourceTest;/*from ww w. j a va 2s . c om*/ if (own_training == 1) { sourceTest = new DataSource("D://own_training//item//feature data//test_feature.arff"); } else { sourceTest = new DataSource("E://test_featureFile.arff"); } //DataSource sourceTest = new DataSource("D://own_training//test_featureFile.arff"); //System.out.println("working"); Instances test = sourceTest.getDataSet(); PrintFile solutionFile; if (own_training == 1) { solutionFile = new PrintFile(null, new File("D://own_training//item//solution//solution.dat")); } else { solutionFile = new PrintFile(null, new File("E://solution.dat")); } //PrintFile solutionFile = new PrintFile(null, new File("D://own_training//solution.dat")); if (test.classIndex() == -1) { test.setClassIndex(test.numAttributes() - 1); } //System.out.println("hello"); ObjectInputStream ois; if (own_training == 1) { ois = new ObjectInputStream(new FileInputStream("D://own_training//item//model//train.model")); } else { ois = new ObjectInputStream(new FileInputStream("E://naive_bayes.model")); } //System.out.println("hello"); Remove rm = new Remove(); rm.setAttributeIndices("1"); rm.setAttributeIndices("2"); //rm.setAttributeIndices("6"); //rm.setAttributeIndices("5"); //NaiveBayes cls = (NaiveBayes) ois.readObject(); FilteredClassifier fc = (FilteredClassifier) ois.readObject(); //fc.setFilter(rm); //fc.setClassifier(cls); ois.close(); int totalSessionCount = 0; int buySessionCount = 0; Integer tempSessionId = (int) test.instance(0).value(0); int sessionItemCount = (int) test.instance(0).value(4); ArrayList<Integer> buy = new ArrayList<>(); String result = String.valueOf(tempSessionId) + ";"; int count = 0; for (int i = 0; i < test.numInstances(); i++) { //System.out.println(i); //System.out.print("ID: " + test.instance(i).value(0)); //if a new session occurs //sessionItemCount++; if ((int) test.instance(i).value(0) != tempSessionId) { totalSessionCount++; if (buy.size() > 0) { if (sessionItemCount != 1) { if (sessionItemCount >= 2 && sessionItemCount <= 3) { if (buy.size() == 1) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); buySessionCount++; } } else if (sessionItemCount >= 4) { if (buy.size() >= 2) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); buySessionCount++; } } } } tempSessionId = (int) test.instance(i).value(0); sessionItemCount = (int) test.instance(i).value(4); //System.out.println(tempSessionId + "," + sessionItemCount); result = String.valueOf(tempSessionId) + ";"; buy.clear(); } double pred = fc.classifyInstance(test.instance(i)); if (test.classAttribute().value((int) pred).equals("buy")) { Integer item = (int) test.instance(i).value(1); buy.add(item); } //System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); //System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } System.out.println(buySessionCount); System.out.println(totalSessionCount); if (buy.size() > 0) { solutionFile.writeFile(result.substring(0, result.length() - 1)); } solutionFile.closeFile(); }
From source file:recsys.ResultProcessing.java
public static void main(String args[]) throws Exception { int own_training = StaticVariables.own_training; //opening the testing file DataSource sourceTest;/*from ww w . j a va 2 s . c o m*/ if (own_training == 1) { sourceTest = new DataSource("D://own_training//item//feature data//test_feature.arff"); } else { sourceTest = new DataSource("E://recsys//item//feature data//test_feature.arff"); } //DataSource sourceTest = new DataSource("D://own_training//test_featureFile.arff"); //System.out.println("working"); Instances test = sourceTest.getDataSet(); PrintFile solutionFile; if (own_training == 1) { solutionFile = new PrintFile(null, new File("D://own_training//item//solution//solution.dat")); } else { solutionFile = new PrintFile(null, new File("E://solution.dat")); } //PrintFile solutionFile = new PrintFile(null, new File("D://own_training//solution.dat")); if (test.classIndex() == -1) { test.setClassIndex(test.numAttributes() - 1); } //System.out.println("hello"); ObjectInputStream ois; if (own_training == 1) { ois = new ObjectInputStream(new FileInputStream("D://own_training//item//model//train.model")); } else { ois = new ObjectInputStream(new FileInputStream("E:\\recsys\\item\\model\\train.model")); //sois = new ObjectInputStream(new FileInputStream("E:\\recsys\\my best performances\\39127.6\\train.model")); } //AdaBoostM1 cls = (AdaBoostM1)ois.readObject(); //BayesNet cls = (BayesNet)ois .readObject(); RandomForest cls = (RandomForest) ois.readObject(); //Logistic cls = (Logistic) ois.readObject(); //System.out.println(cls.globalInfo()); //System.out.println(cls.getNumFeatures()); //System.out.println(cls.toString()); //BayesianLogisticRegression cls = (BayesianLogisticRegression)ois.readObject(); //NaiveBayes cls = (NaiveBayes) ois.readObject(); //FilteredClassifier fc = (FilteredClassifier) ois.readObject(); System.out.println(cls.toString()); ois.close(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1,2,4"; // first attribute //options[2] = "2"; //options[3] = "4"; Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options remove.setInputFormat(test); // inform filter about dataset **AFTER** setting options Instances newData = Filter.useFilter(test, remove); // apply filter System.out.println(newData.firstInstance()); int totalSessionCount = 0; int buySessionCount = 0; int b = 0; Scanner sc; if (own_training == 0) sc = new Scanner(new File("E:\\recsys\\session\\solution\\solution.dat")); //sc = new Scanner(new File("E:\\recsys\\my best performances\\best performance\\solution_session.dat")); else sc = new Scanner(new File("D:\\own_training\\session\\solution\\solution.dat")); //sc = new Scanner(new File("D:\\own_training\\session\\data\\original_solution.csv")); HashMap<Integer, Integer> a = new HashMap<Integer, Integer>(); while (sc.hasNext()) { String temp = sc.next(); StringTokenizer st = new StringTokenizer(temp, ",;"); a.put(Integer.parseInt(st.nextToken()), 1); } System.out.println("size " + a.size()); Integer tempSessionId = (int) test.instance(0).value(0); ArrayList<Integer> buy = new ArrayList<>(); String result = String.valueOf(tempSessionId) + ";"; //int lengthVector[] = new int[300]; int testSessionCount = 0, currentSessionLength = 0; //int sessionLengthCount=0; for (int i = 0; i < test.numInstances(); i++) { if ((int) test.instance(i).value(0) != tempSessionId) { if (a.containsKey(tempSessionId)) { //if(test.instance(i-1).value(3)< StaticVariables.length) { //System.out.println(test.instance(i-1).value(3)); totalSessionCount++; if (buy.size() > 0) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); } //lengthVector[sessionLengthCount]++; } /*}else{ if(buy.size()>= 3){ for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); } }*/ //testSessionCount=0; tempSessionId = (int) test.instance(i).value(0); result = String.valueOf(tempSessionId) + ";"; //sessionLengthCount=0; buy.clear(); } //currentSessionLength = test.instance(i).value(3); //testSessionCount++; //System.out.println("working"); //sessionLengthCount++; double pred = cls.classifyInstance(newData.instance(i)); if (test.classAttribute().value((int) pred).equals("buy")) { b++; Integer item = (int) test.instance(i).value(1); buy.add(item); } //System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); //System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } System.out.println(totalSessionCount); //System.out.println(totalSessionCount); //System.out.println(b); if (buy.size() > 0) { solutionFile.writeFile(result.substring(0, result.length() - 1)); } /*for(int p:lengthVector) System.out.println(p);*/ solutionFile.closeFile(); }
From source file:sentinets.Prediction.java
License:Open Source License
public String updateModel(String inputFile, ArrayList<Double[]> metrics) { String output = ""; this.setInstances(inputFile); FilteredClassifier fcls = (FilteredClassifier) this.cls; SGD cls = (SGD) fcls.getClassifier(); Filter filter = fcls.getFilter(); Instances insAll;/*from w ww . j a va 2 s . com*/ try { insAll = Filter.useFilter(this.unlabled, filter); if (insAll.size() > 0) { Random rand = new Random(10); int folds = 10 > insAll.size() ? 2 : 10; Instances randData = new Instances(insAll); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } Evaluation eval = new Evaluation(randData); eval.evaluateModel(cls, insAll); System.out.println("Initial Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); output += "\n====" + "Initial Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); System.out.println("Cross Validated Evaluation"); output += "\n====" + "Cross Validated Evaluation" + "====\n"; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); for (int i = 0; i < train.numInstances(); i++) { cls.updateClassifier(train.instance(i)); } eval.evaluateModel(cls, test); System.out.println("Cross Validated Evaluation fold: " + n); output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n"; System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); } for (int i = 0; i < insAll.numInstances(); i++) { cls.updateClassifier(insAll.instance(i)); } eval.evaluateModel(cls, insAll); System.out.println("Final Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n====" + "Final Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); fcls.setClassifier(cls); String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS) + "/updatedClassifier.model"; weka.core.SerializationHelper.write(modelFilePath, fcls); output += "\n" + "Updated Model saved at: " + modelFilePath; } else { output += "No new instances for training the model."; } } catch (Exception e) { e.printStackTrace(); } return output; }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
@Override public void buildClassifier(Instances p_Instances) throws Exception { Instances newInsts = null;//w ww .ja v a 2s.c om if (this.m_Classifier == null) { throw new IllegalStateException("No base classifier has been set!"); } this.m_ZeroR = new ZeroR(); this.m_ZeroR.buildClassifier(p_Instances); this.m_ClassAttribute = p_Instances.classAttribute(); this.getOutputFormat(p_Instances); int numClassifiers = p_Instances.numClasses(); switch (numClassifiers) { case 1: this.m_Classifiers = null; break; case 2: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1); newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = this.filterInstance(p_Instances.instance(i)); inst.setDataset(newInsts); newInsts.add(inst); } this.m_Classifiers[0].buildClassifier(newInsts); break; default: this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers); Hashtable<String, ArrayList<Double>> id2Classes = null; if (this.m_IndexOfID >= 0) { id2Classes = new Hashtable<String, ArrayList<Double>>(); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); String id = inst.stringValue(this.m_IndexOfID); if (!id2Classes.containsKey(id)) { id2Classes.put(id, new ArrayList<Double>()); } id2Classes.get(id).add(inst.classValue()); } } for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) { newInsts = this.genInstances(p_Instances, classIdx, id2Classes); this.m_Classifiers[classIdx].buildClassifier(newInsts); } } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * get output format//from www . j a v a2 s .c o m * * @param p_Instances * input format */ protected void getOutputFormat(Instances p_Instances) { FastVector newAtts, newVals; // Compute new attributes newAtts = new FastVector(p_Instances.numAttributes()); for (int j = 0; j < p_Instances.numAttributes(); j++) { Attribute att = p_Instances.attribute(j); if (j != p_Instances.classIndex()) { newAtts.addElement(att.copy()); } else { if (p_Instances.classAttribute().isNumeric()) { newAtts.addElement(new Attribute(att.name())); } else { newVals = new FastVector(2); newVals.addElement("negative"); newVals.addElement("positive"); newAtts.addElement(new Attribute(att.name(), newVals)); } } } // Construct new header this.m_OutputFormat = new Instances(p_Instances.relationName(), newAtts, 0); this.m_OutputFormat.setClassIndex(p_Instances.classIndex()); if (this.m_IndexOfID >= 0) { this.m_OutputFormat.deleteAttributeAt(this.m_IndexOfID); } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CWekaEvaluator.java
License:Open Source License
@Override public Object evaluate(Object p_Lexelt) throws Exception { ILexelt lexelt = (ILexelt) p_Lexelt; String lexeltID = lexelt.getID(); IStatistic stat = (IStatistic) this.getStatistic(lexeltID); int type = 2; String firstSense = this.m_UnknownSense; if (stat == null) { type = 1;/*from w w w . jav a 2 s.c om*/ if (this.m_SenseIndex != null) { String first = this.m_SenseIndex.getFirstSense(lexeltID); if (first != null) { firstSense = first; } } } else { if (stat.getTags().size() == 1) { type = 1; firstSense = stat.getTags().iterator().next(); } else { type = stat.getTags().size(); } } int classIdx = this.m_ClassIndex; CResultInfo retVal = new CResultInfo(); switch (type) { case 0: throw new Exception("no tag for lexelt " + lexeltID + "."); case 1: retVal.lexelt = lexelt.getID(); retVal.docs = new String[lexelt.size()]; retVal.ids = new String[lexelt.size()]; retVal.classes = new String[] { firstSense }; retVal.probabilities = new double[lexelt.size()][1]; for (int i = 0; i < retVal.probabilities.length; i++) { retVal.probabilities[i][0] = 1; retVal.docs[i] = lexelt.getInstanceDocID(i); retVal.ids[i] = lexelt.getInstanceID(i); } break; default: lexelt.setStatistic(stat); Classifier classifier = (Classifier) this.getModel(lexeltID); ILexeltWriter lexeltWriter = new CWekaSparseLexeltWriter(); Instances instances = (Instances) lexeltWriter.getInstances(lexelt); if (classIdx < 0) { classIdx = instances.numAttributes() - 1; } instances.setClassIndex(classIdx); retVal.lexelt = lexelt.getID(); retVal.docs = new String[lexelt.size()]; retVal.ids = new String[lexelt.size()]; retVal.probabilities = new double[instances.numInstances()][]; retVal.classes = new String[instances.classAttribute().numValues()]; for (int i = 0; i < instances.classAttribute().numValues(); i++) { retVal.classes[i] = instances.classAttribute().value(i); } if (instances.classAttribute().isNumeric()) { for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); retVal.docs[i] = lexelt.getInstanceDocID(i); retVal.ids[i] = lexelt.getInstanceID(i); retVal.probabilities[i] = new double[retVal.classes.length]; retVal.probabilities[i][(int) classifier.classifyInstance(instance)] = 1; } } else { for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); retVal.docs[i] = lexelt.getInstanceDocID(i); retVal.ids[i] = lexelt.getInstanceID(i); retVal.probabilities[i] = classifier.distributionForInstance(instance); } } } return retVal; }