List of usage examples for weka.core Instances checkForStringAttributes
publicboolean checkForStringAttributes()
From source file:Pair.java
License:Open Source License
/** * Boosting method./*from www. j av a 2 s . co m*/ * * @param data the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } data = new Instances(data); data.deleteWithMissingClass(); if (data.numInstances() == 0) { throw new Exception("No train instances without class missing!"); } if (!data.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("TrAdaBoostR2 can only handle a numeric class!"); } if (m_SourceInstances == null) { throw new Exception("Source data has not been specified!"); } m_NumClasses = data.numClasses(); try { doCV(data); } catch (Exception e) { e.printStackTrace(); } }
From source file:motaz.CODB.java
License:Open Source License
/** * prepair the data:remove instances with missing value or replace missing values (if specified) * perform normalization for numeric attributes * @param instances The instances that need to be detected for class outliers * @throws java.lang.Exception If clustering was not successful *//*w ww .j a v a2 s. c om*/ public void buildCODB(Instances instances) throws Exception { if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "3"; //range of variables to make nominal convert.setOptions(options); convert.setInputFormat(instances); newData = Filter.useFilter(instances, convert); newData.setClassIndex(3); Instances filteredInstances = newData; System.out.println(newData.attribute(2).isNominal()); database = databaseForName(getDatabase_Type(), filteredInstances); for (int i = 0; i < database.getInstances().numInstances(); i++) { } for (int i = 0; i < database.getInstances().numInstances(); i++) { DataObject dataObject = dataObjectForName(getDatabase_distanceType(), database.getInstances().instance(i), Integer.toString(i), database); database.insert(dataObject); } pk_list = new double[database.size()]; //Get the max row ResultSet rs = null; PreparedStatement preparedStatement = null; String query = "select max(pk) as ind from geo_osfpm.geo_osfpm_outlier"; preparedStatement = PostgreSQLlocal.PostgreSQLlocal().prepareStatement(query); rs = preparedStatement.executeQuery(); rs.next(); int pk = rs.getInt("ind"); System.out.println(database.size()); for (int i = 0; i < database.size(); i++) { DataObject dataObject = dataObjectForName(getDatabase_distanceType(), database.getInstances().instance(i), Integer.toString(i), database); pk_list[i] = pk + i + 1; ; } setTopN((database.size() / (5))); database.setMinMaxValues(); System.out.println("Inserted Values"); }
From source file:smo2.SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one wrapper * for multi-class problems./* w ww .j ava2 s . com*/ * * @param insts * the set of training instances * @exception Exception * if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { if (insts.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (insts.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException( "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression."); } insts = new Instances(insts); insts.deleteWithMissingClass(); if (insts.numInstances() == 0) { throw new Exception("No training instances without a missing class!"); } /* * Removes all the instances with weight equal to 0. MUST be done * since condition (8) of Keerthi's paper is made with the assertion * Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!"); } insts = data; } m_onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { m_onlyNumeric = false; break; } } } } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (!m_onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarymySMO(); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }