List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:cba.ItemSet.java
License:Open Source License
/** * Returns the contents of an item set as a string. * * @param instances contains the relevant header information * @return string describing the item set *///from w ww . j ava 2s .c o m public String toString(Instances instances) { StringBuffer text = new StringBuffer(); for (int i = 0; i < instances.numAttributes(); i++) if (m_items[i] != -1) { text.append(instances.attribute(i).name() + '='); text.append(instances.attribute(i).value(m_items[i]) + ' '); } text.append(m_counter); return text.toString(); }
From source file:CEP.GenerateStream.java
public void MakeStream() { File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff"); String pc = System.getProperty("user.dir").toString(); if (pc.contains("gs023850")) { file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff"); }/* ww w .jav a 2 s . c o m*/ try { ArffLoader loader = new ArffLoader(); loader.setFile(file); Instances structure = loader.getStructure(); int j = structure.numAttributes(); HeaderManager.SetStructure(new Instances(structure)); Instance current; long previousTimeStamp = 0; String timeStamp = "0"; long wait = 0; while ((current = loader.getNextInstance(structure)) != null) { timeStamp = current.stringValue(0); cepRT.sendEvent(current); System.out.println("Sending event"); previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait); } } catch (Exception e) { if (e.equals(new FileNotFoundException())) { System.out.println("File not found - could not generate stream"); return; } else if (e.equals(new IOException())) { System.out.println("Unable to read file"); } else if (e.equals(new NumberFormatException())) { System.out.println("Unable to convert to time to number - bad time"); } else { System.out.println(e.toString()); } } }
From source file:CEP.HeaderManager.java
static void SetStructure(Instances structure) { ArffStructure = structure; structure.setClassIndex(structure.numAttributes() - 1); lock = false; }
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree.//from w w w . ja va 2 s.c o m * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) { Influence[] ret = null;//from ww w.ja v a 2s . c om try { Instances data = DataSource.read(filePath); ret = new Influence[data.numAttributes() - 1]; data.setClassIndex(data.numAttributes() - 1); // other options int seed = 1; int folds = 10; // randomize data Instances randData = new Instances(data); Random rand = new Random(seed); randData.randomize(rand); Evaluation evalBase = getEvaluation(randData, model, folds); double accBase = evalBase.correct() / evalBase.numInstances() * 100; double nf = randData.numAttributes(); for (int j = 0; j < nf - 1; j++) { ret[j] = new Influence(); String str = randData.attribute(j).name(); Attribute att = randData.attribute(j); randData.deleteAttributeAt(j); Evaluation evalTemp = getEvaluation(randData, model, folds); double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100; double tempInfluence = accBase - accTemp; ret[j].attributeName = str; ret[j].infVal = tempInfluence; randData.insertAttributeAt(att, j); } sortInfluenceArray(ret); } catch (Exception ex) { Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex); } return ret; }
From source file:cezeri.feature.selection.FeatureSelectionRanker.java
/** * You should use this method only for the Classification problems Fisher is * not suitable for Regression. For Regression problems you can use CRCF * method./*from w w w . j av a 2 s .c o m*/ * * @param data * @param type * @return */ public static TFeatureRank[] fisherDistance(Instances data, int type) { if (type == TMachineLearning.REGRESSION) { return null; } TFeatureRank[] ret = new TFeatureRank[data.numAttributes() - 1]; String[] attributeNames = FactoryInstance.getAttributeList(data); // FactoryInstance.getMatrix(data).plot(); Instances[] ins = FactoryInstance.getSpecificInstancesBasedOnClassValue(data, FactoryInstance.getDefaultClasses(data)); if (ins.length < 2) { return null; } double[][] cl_1 = CMatrix.getInstance(FactoryInstance.getData(ins[0])).transpose().get2DArrayDouble(); double[][] cl_2 = CMatrix.getInstance(FactoryInstance.getData(ins[1])).transpose().get2DArrayDouble(); // FactoryMatrix.transpose(FactoryInstance.getData(ins[1])); double[] fisher = new double[cl_1.length]; for (int i = 0; i < cl_1.length - 1; i++) { double mean_1 = FactoryUtils.getMean(cl_1[i]); double std_1 = FactoryStatistic.getStandardDeviation(cl_1[i]); double mean_2 = FactoryUtils.getMean(cl_2[i]); double std_2 = FactoryStatistic.getStandardDeviation(cl_2[i]); if (Math.pow(std_1, 2) + Math.pow(std_2, 2) == 0.0) { fisher[i] = 0.0; } else { // double f = Math.abs(mean_1 - mean_2) / (Math.pow(std_1, 2) + Math.pow(std_2, 2)); double f = Math.pow((mean_1 - mean_2), 2) / (Math.pow(std_1, 2) + Math.pow(std_2, 2)); fisher[i] = FactoryUtils.formatDouble(f); } TFeatureRank obj = new TFeatureRank(); obj.featureName = attributeNames[i]; obj.index = "" + i; obj.value = fisher[i]; ret[i] = obj; //println(i + ".fisher distance:" + fisher[i]); } ArrayList<TFeatureRank> lst = toArrayList(ret); Collections.sort(lst, new CustomComparatorForFeatureRank()); ret = toArray(lst); // int[] fisherIndex = FactoryUtils.sortArrayAndReturnIndex(fisher, "desc"); return ret; }
From source file:cezeri.feature.selection.FeatureSelectionRanker.java
/** * if full exhaustive search is not feasible due to computational cost, * shrink search space by reducing the number of features that you want to * explore/*from w w w.java 2 s .co m*/ * * @param nSubset desired subset number i.e.: if you have 15 features you * may want to 9 feature subset result * @param data train or test data * @param model classifier you used * @param nFolds during learning what will be the cross validation folds * @param show_text print the output * @param show_plot plot the output * @return */ public static TFeatureRank[] wrapperExhaustiveSearchLimited(int nSubset, Instances data, Classifier model, int nFolds, boolean show_text, boolean show_plot) { if (nSubset > data.numAttributes() - 1) { System.out.println("subset should be less than attribute number"); return null; } String[] attributeNames = FactoryInstance.getAttributeListExceptClassAttribute(data); String[] lstComb = FactoryCombination.getCombination(attributeNames, nSubset); FactoryCombination.toString(lstComb); TFeatureRank[] ret = computeCombinationPairs(lstComb, data, model, nFolds, show_text, show_plot); return ret; }
From source file:cezeri.feature.selection.FeatureSelectionRanker.java
/** * if number of features is less than 15 only you can make exhaustive global * search on the feature space// w ww . j av a 2s . co m * * @param data :dataset * @param model :classifier * @param nFolds :number of cross validation folds * @param show_text :print the output * @param show_plot :plot the output * @return */ public static TFeatureRank[] wrapperExhaustiveSearch(Instances data, Classifier model, int nFolds, boolean show_text, boolean show_plot) { if (data.numAttributes() > 15) { System.out.println( "for exhaustive search num of attributes greater than 13 is not feasible comp cost is too high to compute"); return null; } String[] attributeNames = FactoryInstance.getAttributeListExceptClassAttribute(data); String[] lstComb = FactoryCombination.getAllCombinations(attributeNames); TFeatureRank[] ret = computeCombinationPairs(lstComb, data, model, nFolds, show_text, show_plot); return ret; }
From source file:cezeri.feature.selection.FeatureSelectionRanker.java
private static TFeatureRank[] correlation(Instances data, int type) { TFeatureRank[] ret = new TFeatureRank[data.numAttributes() - 1]; String[] attributeNames = FactoryInstance.getAttributeList(data); double[] out = data.attributeToDoubleArray(data.classIndex()); for (int i = 0; i < data.numAttributes() - 1; i++) { TFeatureRank obj = new TFeatureRank(); obj.featureName = attributeNames[i]; obj.index = i + ""; if (type == TCorelation.ARE) { obj.value = Math.abs(FactoryStatistic.ARE(data.attributeToDoubleArray(i), out)); }/* ww w . ja v a2s . com*/ if (type == TCorelation.CRCF) { obj.value = Math.abs(FactoryStatistic.CRCF(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.IOA) { obj.value = Math.abs(FactoryStatistic.IOA(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.KENDALL) { obj.value = Math.abs(FactoryStatistic.KENDALL(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.MAE) { obj.value = Math.abs(FactoryStatistic.MAE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.MPE) { obj.value = Math.abs(FactoryStatistic.MPE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.MSE) { obj.value = Math.abs(FactoryStatistic.MSE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.NSEC) { obj.value = Math.abs(FactoryStatistic.NSEC(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.PEARSON) { obj.value = Math.abs(FactoryStatistic.PEARSON(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.R) { obj.value = Math.abs(FactoryStatistic.R(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.R2) { obj.value = Math.abs(FactoryStatistic.R2(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.RAE) { obj.value = Math.abs(FactoryStatistic.RAE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.RELATIVE_NSEC) { obj.value = Math.abs(FactoryStatistic.RELATIVE_NSEC(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.RMSE) { obj.value = Math.abs(FactoryStatistic.RMSE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.RRSE) { obj.value = Math.abs(FactoryStatistic.RRSE(data.attributeToDoubleArray(i), out)); } if (type == TCorelation.SPEARMAN) { obj.value = Math.abs(FactoryStatistic.SPEARMAN(data.attributeToDoubleArray(i), out)); } // if (type==FactoryCorrelation.KENDALL) { // obj.value=Math.abs(FactoryCorrelation.rankKendallTauBeta(data.attributeToDoubleArray(i), out)); // } // if (type==FactoryCorrelation.PEARSON) { // obj.value=Math.abs(FactoryCorrelation.pearson(data.attributeToDoubleArray(i), out)); // } // if (type==FactoryCorrelation.SPEARMAN) { // obj.value=Math.abs(FactoryCorrelation.spearman(data.attributeToDoubleArray(i), out)); // } ret[i] = obj; } ArrayList<TFeatureRank> lst = toArrayList(ret); Collections.sort(lst, new CustomComparatorForFeatureRank()); ret = toArray(lst); return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static Instances generateInstances(String relationName, int nCols) { CMatrix cm = CMatrix.getInstance().zeros(1, nCols); FastVector att = new FastVector(); for (int i = 0; i < cm.getColumnNumber(); i++) { att.addElement(new Attribute("f" + (i + 1))); }/*from ww w . j a va 2 s . c om*/ Instances ret = new Instances(relationName, att, cm.getRowNumber()); for (int i = 0; i < cm.getRowNumber(); i++) { Instance ins = new Instance(cm.getColumnNumber()); for (int j = 0; j < cm.getColumnNumber(); j++) { ins.setValue(j, cm.get2DArrayDouble()[i][j]); } ret.add(ins); } ret.setClassIndex(ret.numAttributes() - 1); return ret; }