List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:controller.MineroControler.java
public String clasificardorArbolAleat(String atributo) { BufferedReader breader = null; Instances datos = null; breader = new BufferedReader(fuente_arff); try {//from w ww . j a va 2 s. c o m datos = new Instances(breader); Attribute atr = datos.attribute(atributo); datos.setClass(atr); //datos.setClassIndex(0); } catch (IOException ex) { System.err.println("Problemas al intentar cargar los datos"); return null; } RandomTree arbol = new RandomTree(); // Class for constructing a tree that considers K randomly chosen attributes at each node. try { arbol.setNumFolds(100); arbol.setKValue(0); arbol.setMinNum(1); arbol.setMaxDepth(0); arbol.setSeed(1); arbol.buildClassifier(datos); } catch (Exception ex) { System.err.println("Problemas al ejecutar algorimo de clasificacion" + ex.getLocalizedMessage()); } return arbol.toString(); }
From source file:core.classifier.MyFirstClassifier.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems.//from w w w. j a v a 2 s. c o m * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:core.ClusterEvaluationEX.java
License:Open Source License
/** * Evaluates a clusterer with the options given in an array of * strings. It takes the string indicated by "-t" as training file, the * string indicated by "-T" as test file. * If the test file is missing, a stratified ten-fold * cross-validation is performed (distribution clusterers only). * Using "-x" you can change the number of * folds to be used, and using "-s" the random seed. * If the "-p" option is present it outputs the classification for * each test instance. If you provide the name of an object file using * "-l", a clusterer will be loaded from the given file. If you provide the * name of an object file using "-d", the clusterer built from the * training data will be saved to the given file. * * @param clusterer machine learning clusterer * @param options the array of string containing the options * @throws Exception if model could not be evaluated successfully * @return a string describing the results *///w ww .j a va2s . c o m public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception { int seed = 1, folds = 10; boolean doXval = false; Instances train = null; Random random; String trainFileName, testFileName, seedString, foldsString; String objectInputFileName, objectOutputFileName, attributeRangeString; String graphFileName; String[] savedOptions = null; boolean printClusterAssignments = false; Range attributesToOutput = null; StringBuffer text = new StringBuffer(); int theClass = -1; // class based evaluation of clustering boolean updateable = (clusterer instanceof UpdateableClusterer); DataSource source = null; Instance inst; if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) { // global info requested as well? boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options); throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo)); } try { // Get basic options (options the same for all clusterers //printClusterAssignments = Utils.getFlag('p', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); trainFileName = Utils.getOption('t', options); testFileName = Utils.getOption('T', options); graphFileName = Utils.getOption('g', options); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (Exception e) { throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.length() != 0) { printClusterAssignments = true; if (!attributeRangeString.equals("0")) attributesToOutput = new Range(attributeRangeString); } if (trainFileName.length() == 0) { if (objectInputFileName.length() == 0) { throw new Exception("No training file and no object " + "input file given."); } if (testFileName.length() == 0) { throw new Exception("No training file and no test file given."); } } else { if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) { throw new Exception("Can't use both train and model file " + "unless -p specified."); } } seedString = Utils.getOption('s', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); doXval = true; } } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false)); } try { if (trainFileName.length() != 0) { source = new DataSource(trainFileName); train = source.getStructure(); String classString = Utils.getOption('c', options); if (classString.length() != 0) { if (classString.compareTo("last") == 0) theClass = train.numAttributes(); else if (classString.compareTo("first") == 0) theClass = 1; else theClass = Integer.parseInt(classString); if (theClass != -1) { if (doXval || testFileName.length() != 0) throw new Exception("Can only do class based evaluation on the " + "training data"); if (objectInputFileName.length() != 0) throw new Exception("Can't load a clusterer and do class based " + "evaluation"); if (objectOutputFileName.length() != 0) throw new Exception("Can't do class based evaluation and save clusterer"); } } else { // if the dataset defines a class attribute, use it if (train.classIndex() != -1) { theClass = train.classIndex() + 1; System.err .println("Note: using class attribute from dataset, i.e., attribute #" + theClass); } } if (theClass != -1) { if (theClass < 1 || theClass > train.numAttributes()) throw new Exception("Class is out of range!"); if (!train.attribute(theClass - 1).isNominal()) throw new Exception("Class must be nominal!"); train.setClassIndex(theClass - 1); } } } catch (Exception e) { throw new Exception("ClusterEvaluation: " + e.getMessage() + '.'); } // Save options if (options != null) { savedOptions = new String[options.length]; System.arraycopy(options, 0, savedOptions, 0, options.length); } if (objectInputFileName.length() != 0) Utils.checkForRemainingOptions(options); // Set options for clusterer if (clusterer instanceof OptionHandler) ((OptionHandler) clusterer).setOptions(options); Utils.checkForRemainingOptions(options); Instances trainHeader = train; if (objectInputFileName.length() != 0) { // Load the clusterer from file // clusterer = (Clusterer) SerializationHelper.read(objectInputFileName); java.io.ObjectInputStream ois = new java.io.ObjectInputStream( new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName))); clusterer = (Clusterer) ois.readObject(); // try and get the training header try { trainHeader = (Instances) ois.readObject(); } catch (Exception ex) { // don't moan if we cant } } else { // Build the clusterer if no object file provided if (theClass == -1) { if (updateable) { clusterer.buildClusterer(source.getStructure()); while (source.hasMoreElements(train)) { inst = source.nextElement(train); ((UpdateableClusterer) clusterer).updateClusterer(inst); } ((UpdateableClusterer) clusterer).updateFinished(); } else { clusterer.buildClusterer(source.getDataSet()); } } else { Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + theClass); removeClass.setInvertSelection(false); removeClass.setInputFormat(train); if (updateable) { Instances clusterTrain = Filter.useFilter(train, removeClass); clusterer.buildClusterer(clusterTrain); trainHeader = clusterTrain; while (source.hasMoreElements(train)) { inst = source.nextElement(train); removeClass.input(inst); removeClass.batchFinished(); Instance clusterTrainInst = removeClass.output(); ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst); } ((UpdateableClusterer) clusterer).updateFinished(); } else { Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass); clusterer.buildClusterer(clusterTrain); trainHeader = clusterTrain; } ClusterEvaluationEX ce = new ClusterEvaluationEX(); ce.setClusterer(clusterer); ce.evaluateClusterer(train, trainFileName); return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString(); } } /* Output cluster predictions only (for the test data if specified, otherwise for the training data */ if (printClusterAssignments) { return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput); } text.append(clusterer.toString()); text.append( "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName)); if (testFileName.length() != 0) { // check header compatibility DataSource test = new DataSource(testFileName); Instances testStructure = test.getStructure(); if (!trainHeader.equalHeaders(testStructure)) { throw new Exception("Training and testing data are not compatible\n"); } text.append("\n\n=== Clustering stats for testing data ===\n\n" + printClusterStats(clusterer, testFileName)); } if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0) && (objectInputFileName.length() == 0)) { // cross validate the log likelihood on the training data random = new Random(seed); random.setSeed(seed); train = source.getDataSet(); train.randomize(random); text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random)); } // Save the clusterer if an object output file is provided if (objectOutputFileName.length() != 0) { //SerializationHelper.write(objectOutputFileName, clusterer); saveClusterer(objectOutputFileName, clusterer, trainHeader); } // If classifier is drawable output string describing graph if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) { BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName)); writer.write(((Drawable) clusterer).graph()); writer.newLine(); writer.flush(); writer.close(); } return text.toString(); }
From source file:core.DatabaseSaverEx.java
License:Open Source License
/** * Writes the structure (header information) to a database by creating a new table. * // w w w . j a v a 2 s .c om * @throws Exception if something goes wrong */ private void writeStructure() throws Exception { StringBuffer query = new StringBuffer(); Instances structure = getInstances(); query.append("CREATE TABLE "); if (m_tabName || m_tableName.equals("")) m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName()); if (m_DataBaseConnection.getUpperCase()) { m_tableName = m_tableName.toUpperCase(); m_createInt = m_createInt.toUpperCase(); m_createDouble = m_createDouble.toUpperCase(); m_createText = m_createText.toUpperCase(); m_createDate = m_createDate.toUpperCase(); } m_tableName = m_tableName.replaceAll("[^\\w]", "_"); m_tableName = m_DataBaseConnection.maskKeyword(m_tableName); query.append(m_tableName); if (structure.numAttributes() == 0) throw new Exception("Instances have no attribute."); query.append(" ( "); if (m_id) { if (m_DataBaseConnection.getUpperCase()) m_idColumn = m_idColumn.toUpperCase(); query.append(m_DataBaseConnection.maskKeyword(m_idColumn)); query.append(" "); query.append(m_createInt); query.append(" PRIMARY KEY,"); } for (int i = 0; i < structure.numAttributes(); i++) { Attribute att = structure.attribute(i); String attName = att.name(); attName = attName.replaceAll("[^\\w]", "_"); attName = m_DataBaseConnection.maskKeyword(attName); if (m_DataBaseConnection.getUpperCase()) query.append(attName.toUpperCase()); else query.append(attName); if (att.isDate()) query.append(" " + m_createDate); else { if (att.isNumeric()) query.append(" " + m_createDouble); else query.append(" " + m_createText); } if (i != structure.numAttributes() - 1) query.append(", "); } query.append(" )"); //System.out.println(query.toString()); m_DataBaseConnection.update(query.toString()); m_DataBaseConnection.close(); if (!m_DataBaseConnection.tableExists(m_tableName)) { throw new IOException("Table cannot be built."); } }
From source file:core.me.Context.java
License:Open Source License
public Instance buildSCAInstance() { Instances dataStruc = Classifiers.getInst(false).getDataStructSCA(); double[] values = new double[2]; values[0] = this.theSymbol.getRatio(); values[1] = dataStruc.attribute(1).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc);//from w w w . j a va2s . co m inst.setClassMissing(); return inst; }
From source file:core.me.Context.java
License:Open Source License
public Instance buildSCBInstance() { Instances dataStruc = Classifiers.getInst(false).getDataStructSCB(); double H = 0, D = 0, DX = 0.; int parentClass = 1; if (this.getParent() != null) { H = this.getH(); D = this.getD(); DX = this.getDX(); parentClass = this.getParentSymbol().getSymbolClass(); }/* w w w . j a v a 2 s. c om*/ double[] values = new double[5]; values[0] = H; values[1] = D; values[2] = DX; values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(parentClass)); values[4] = dataStruc.attribute(4).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return inst; }
From source file:core.me.Context.java
License:Open Source License
public Instance buildSCC1Instance() { Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.INLINE); double LH, LD, LDX; int LCLASS;// w ww .j av a 2 s.c om double height = (double) this.theSymbol.getHeight(); LH = height / ((double) this.horSymbol.getHeight()); LD = (this.theSymbol.getCenter() - this.horSymbol.getCenter()) / height; LDX = this.getHor().getDX(); LCLASS = this.horSymbol.getSymbolClass(); double[] values = new double[5]; values[0] = LH; values[1] = LD; values[2] = LDX; values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(LCLASS)); values[4] = dataStruc.attribute(4).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return inst; }
From source file:core.me.Context.java
License:Open Source License
public Instance buildSCC2Instance() { Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.SUPERSCRIPT); double EH, ED, EDX; int ECLASS;/*from w ww. j a va 2s .c o m*/ double height = (double) this.theSymbol.getHeight(); EH = height / ((double) this.supSymbol.getHeight()); ED = (this.theSymbol.getCenter() - this.supSymbol.getCenter()) / height; EDX = this.getSup().getDX(); ECLASS = this.supSymbol.getSymbolClass(); double[] values = new double[5]; values[0] = EH; values[1] = ED; values[2] = EDX; values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(ECLASS)); values[4] = dataStruc.attribute(4).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return inst; }
From source file:core.me.Context.java
License:Open Source License
public Instance buildSCC3Instance() { Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.SUBSCRIPT); double SH, SD, SDX; int SCLASS;//from www . ja va 2 s . com double height = (double) this.theSymbol.getHeight(); SH = height / ((double) this.subSymbol.getHeight()); SD = (this.theSymbol.getCenter() - this.subSymbol.getCenter()) / height; SDX = this.getSub().getDX(); SCLASS = this.subSymbol.getSymbolClass(); double[] values = new double[5]; values[0] = SH; values[1] = SD; values[2] = SDX; values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(SCLASS)); values[4] = dataStruc.attribute(4).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return inst; }
From source file:core.me.Context.java
License:Open Source License
public Instance buildRCInstance() { Instances dataStruc = Classifiers.getInst(false).getDataStructRC(); double H = 0, D = 0, DX = 0.; int parentClass = 1; if (this.getParent() != null) { H = ((double) this.getParentSymbol().getHeight()) / this.theSymbol.getHeight(); D = (this.getParentSymbol().getCenter() - this.theSymbol.getCenter()) / this.getParentSymbol().getHeight(); DX = this.getDX(); parentClass = this.getParentSymbol().getSymbolClass(); }/* ww w . j a v a2 s . c o m*/ double[] values = new double[6]; values[0] = H; values[1] = D; values[2] = DX; values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get()); values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass); values[5] = dataStruc.attribute(5).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return inst; }