Example usage for weka.core Instances checkForStringAttributes

List of usage examples for weka.core Instances checkForStringAttributes

Introduction

In this page you can find the example usage for weka.core Instances checkForStringAttributes.

Prototype

publicboolean checkForStringAttributes() 

Source Link

Document

Checks for string attributes in the dataset

Usage

From source file:Pair.java

License:Open Source License

/**
 * Boosting method./*from www. j av a 2  s .  co  m*/
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @exception Exception if the classifier could not be built successfully
 */

public void buildClassifier(Instances data) throws Exception {

    super.buildClassifier(data);

    if (data.checkForStringAttributes()) {
        throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    if (data.numInstances() == 0) {
        throw new Exception("No train instances without class missing!");
    }
    if (!data.classAttribute().isNumeric()) {
        throw new UnsupportedClassTypeException("TrAdaBoostR2 can only handle a numeric class!");
    }
    if (m_SourceInstances == null) {
        throw new Exception("Source data has not been specified!");
    }

    m_NumClasses = data.numClasses();
    try {
        doCV(data);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:motaz.CODB.java

License:Open Source License

/**
 * prepair the data:remove instances with missing value or replace missing values (if specified)
 * perform normalization for numeric attributes
 * @param instances The instances that need to be detected for class outliers
 * @throws java.lang.Exception If clustering was not successful
 *//*w  ww  .j  a  v a2 s.  c om*/
public void buildCODB(Instances instances) throws Exception {

    if (instances.checkForStringAttributes()) {
        throw new Exception("Can't handle string attributes!");
    }

    NumericToNominal convert = new NumericToNominal();
    String[] options = new String[2];
    options[0] = "-R";
    options[1] = "3"; //range of variables to make nominal

    convert.setOptions(options);
    convert.setInputFormat(instances);

    newData = Filter.useFilter(instances, convert);
    newData.setClassIndex(3);
    Instances filteredInstances = newData;
    System.out.println(newData.attribute(2).isNominal());

    database = databaseForName(getDatabase_Type(), filteredInstances);
    for (int i = 0; i < database.getInstances().numInstances(); i++) {

    }
    for (int i = 0; i < database.getInstances().numInstances(); i++) {
        DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
                database.getInstances().instance(i), Integer.toString(i), database);

        database.insert(dataObject);

    }
    pk_list = new double[database.size()];
    //Get the max row 

    ResultSet rs = null;
    PreparedStatement preparedStatement = null;
    String query = "select max(pk) as ind from geo_osfpm.geo_osfpm_outlier";
    preparedStatement = PostgreSQLlocal.PostgreSQLlocal().prepareStatement(query);
    rs = preparedStatement.executeQuery();
    rs.next();
    int pk = rs.getInt("ind");
    System.out.println(database.size());
    for (int i = 0; i < database.size(); i++) {
        DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
                database.getInstances().instance(i), Integer.toString(i), database);
        pk_list[i] = pk + i + 1;
        ;

    }
    setTopN((database.size() / (5)));
    database.setMinMaxValues();
    System.out.println("Inserted Values");
}

From source file:smo2.SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one wrapper
 * for multi-class problems./*  w  ww  .j  ava2  s  . com*/
 *
 * @param insts
 *            the set of training instances
 * @exception Exception
 *                if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        if (insts.checkForStringAttributes()) {
            throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
        }
        if (insts.classAttribute().isNumeric()) {
            throw new UnsupportedClassTypeException(
                    "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression.");
        }
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        if (insts.numInstances() == 0) {
            throw new Exception("No training instances without a missing class!");
        }

        /*
         * Removes all the instances with weight equal to 0. MUST be done
         * since condition (8) of Keerthi's paper is made with the assertion
         * Ci > 0 (See equation (3a).
         */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing "
                    + "instance with either a weight null or a missing class!");
        }
        insts = data;

    }

    m_onlyNumeric = true;
    if (!m_checksTurnedOff) {
        for (int i = 0; i < insts.numAttributes(); i++) {
            if (i != insts.classIndex()) {
                if (!insts.attribute(i).isNumeric()) {
                    m_onlyNumeric = false;
                    break;
                }
            }
        }
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (!m_onlyNumeric) {
        m_NominalToBinary = new NominalToBinary();
        m_NominalToBinary.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_NominalToBinary);
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarymySMO();
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}