Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:controller.MineroControler.java

public String clasificardorArbolAleat(String atributo) {
    BufferedReader breader = null;
    Instances datos = null;
    breader = new BufferedReader(fuente_arff);
    try {//from w  ww .  j a va  2  s. c  o m
        datos = new Instances(breader);
        Attribute atr = datos.attribute(atributo);
        datos.setClass(atr);
        //datos.setClassIndex(0);
    } catch (IOException ex) {
        System.err.println("Problemas al intentar cargar los datos");
        return null;
    }

    RandomTree arbol = new RandomTree(); // Class for constructing a tree that considers K randomly chosen attributes at each node. 

    try {

        arbol.setNumFolds(100);
        arbol.setKValue(0);
        arbol.setMinNum(1);
        arbol.setMaxDepth(0);
        arbol.setSeed(1);
        arbol.buildClassifier(datos);

    } catch (Exception ex) {
        System.err.println("Problemas al ejecutar algorimo de clasificacion" + ex.getLocalizedMessage());
    }
    return arbol.toString();
}

From source file:core.classifier.MyFirstClassifier.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.//from   w  w  w. j  a  v  a  2  s.  c  o m
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates a clusterer with the options given in an array of
 * strings. It takes the string indicated by "-t" as training file, the
 * string indicated by "-T" as test file.
 * If the test file is missing, a stratified ten-fold
 * cross-validation is performed (distribution clusterers only).
 * Using "-x" you can change the number of
 * folds to be used, and using "-s" the random seed.
 * If the "-p" option is present it outputs the classification for
 * each test instance. If you provide the name of an object file using
 * "-l", a clusterer will be loaded from the given file. If you provide the
 * name of an object file using "-d", the clusterer built from the
 * training data will be saved to the given file.
 *
 * @param clusterer machine learning clusterer
 * @param options the array of string containing the options
 * @throws Exception if model could not be evaluated successfully
 * @return a string describing the results 
 *///w  ww  .j  a  va2s  . c  o m
public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {

    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Random random;
    String trainFileName, testFileName, seedString, foldsString;
    String objectInputFileName, objectOutputFileName, attributeRangeString;
    String graphFileName;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {

        // global info requested as well?
        boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

        throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo));
    }

    try {
        // Get basic options (options the same for all clusterers
        //printClusterAssignments = Utils.getFlag('p', options);
        objectInputFileName = Utils.getOption('l', options);
        objectOutputFileName = Utils.getOption('d', options);
        trainFileName = Utils.getOption('t', options);
        testFileName = Utils.getOption('T', options);
        graphFileName = Utils.getOption('g', options);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClusterAssignments = true;
            if (!attributeRangeString.equals("0"))
                attributesToOutput = new Range(attributeRangeString);
        }

        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }

            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test file given.");
            }
        } else {
            if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
                throw new Exception("Can't use both train and model file " + "unless -p specified.");
            }
        }

        seedString = Utils.getOption('s', options);

        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }

        foldsString = Utils.getOption('x', options);

        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
            doXval = true;
        }
    } catch (Exception e) {
        throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false));
    }

    try {
        if (trainFileName.length() != 0) {
            source = new DataSource(trainFileName);
            train = source.getStructure();

            String classString = Utils.getOption('c', options);
            if (classString.length() != 0) {
                if (classString.compareTo("last") == 0)
                    theClass = train.numAttributes();
                else if (classString.compareTo("first") == 0)
                    theClass = 1;
                else
                    theClass = Integer.parseInt(classString);

                if (theClass != -1) {
                    if (doXval || testFileName.length() != 0)
                        throw new Exception("Can only do class based evaluation on the " + "training data");

                    if (objectInputFileName.length() != 0)
                        throw new Exception("Can't load a clusterer and do class based " + "evaluation");

                    if (objectOutputFileName.length() != 0)
                        throw new Exception("Can't do class based evaluation and save clusterer");
                }
            } else {
                // if the dataset defines a class attribute, use it
                if (train.classIndex() != -1) {
                    theClass = train.classIndex() + 1;
                    System.err
                            .println("Note: using class attribute from dataset, i.e., attribute #" + theClass);
                }
            }

            if (theClass != -1) {
                if (theClass < 1 || theClass > train.numAttributes())
                    throw new Exception("Class is out of range!");

                if (!train.attribute(theClass - 1).isNominal())
                    throw new Exception("Class must be nominal!");

                train.setClassIndex(theClass - 1);
            }
        }
    } catch (Exception e) {
        throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
        savedOptions = new String[options.length];
        System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
        Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
        ((OptionHandler) clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
        // Load the clusterer from file
        //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
        java.io.ObjectInputStream ois = new java.io.ObjectInputStream(
                new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName)));
        clusterer = (Clusterer) ois.readObject();
        // try and get the training header
        try {
            trainHeader = (Instances) ois.readObject();
        } catch (Exception ex) {
            // don't moan if we cant
        }
    } else {
        // Build the clusterer if no object file provided
        if (theClass == -1) {
            if (updateable) {
                clusterer.buildClusterer(source.getStructure());
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    ((UpdateableClusterer) clusterer).updateClusterer(inst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                clusterer.buildClusterer(source.getDataSet());
            }
        } else {
            Remove removeClass = new Remove();
            removeClass.setAttributeIndices("" + theClass);
            removeClass.setInvertSelection(false);
            removeClass.setInputFormat(train);
            if (updateable) {
                Instances clusterTrain = Filter.useFilter(train, removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    removeClass.input(inst);
                    removeClass.batchFinished();
                    Instance clusterTrainInst = removeClass.output();
                    ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
            }
            ClusterEvaluationEX ce = new ClusterEvaluationEX();
            ce.setClusterer(clusterer);
            ce.evaluateClusterer(train, trainFileName);

            return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
        }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
        return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
            "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
        // check header compatibility
        DataSource test = new DataSource(testFileName);
        Instances testStructure = test.getStructure();
        if (!trainHeader.equalHeaders(testStructure)) {
            throw new Exception("Training and testing data are not compatible\n");
        }

        text.append("\n\n=== Clustering stats for testing data ===\n\n"
                + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0)
            && (objectInputFileName.length() == 0)) {
        // cross validate the log likelihood on the training data
        random = new Random(seed);
        random.setSeed(seed);
        train = source.getDataSet();
        train.randomize(random);
        text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        //SerializationHelper.write(objectOutputFileName, clusterer);
        saveClusterer(objectOutputFileName, clusterer, trainHeader);
    }

    // If classifier is drawable output string describing graph
    if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) {
        BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
        writer.write(((Drawable) clusterer).graph());
        writer.newLine();
        writer.flush();
        writer.close();
    }

    return text.toString();
}

From source file:core.DatabaseSaverEx.java

License:Open Source License

/** 
 * Writes the structure (header information) to a database by creating a new table.
 * //  w  w  w  .  j  a v  a 2  s  .c om
 * @throws Exception if something goes wrong
 */
private void writeStructure() throws Exception {

    StringBuffer query = new StringBuffer();
    Instances structure = getInstances();
    query.append("CREATE TABLE ");
    if (m_tabName || m_tableName.equals(""))
        m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName());
    if (m_DataBaseConnection.getUpperCase()) {
        m_tableName = m_tableName.toUpperCase();
        m_createInt = m_createInt.toUpperCase();
        m_createDouble = m_createDouble.toUpperCase();
        m_createText = m_createText.toUpperCase();
        m_createDate = m_createDate.toUpperCase();
    }
    m_tableName = m_tableName.replaceAll("[^\\w]", "_");
    m_tableName = m_DataBaseConnection.maskKeyword(m_tableName);
    query.append(m_tableName);
    if (structure.numAttributes() == 0)
        throw new Exception("Instances have no attribute.");
    query.append(" ( ");
    if (m_id) {
        if (m_DataBaseConnection.getUpperCase())
            m_idColumn = m_idColumn.toUpperCase();
        query.append(m_DataBaseConnection.maskKeyword(m_idColumn));
        query.append(" ");
        query.append(m_createInt);
        query.append(" PRIMARY KEY,");
    }
    for (int i = 0; i < structure.numAttributes(); i++) {
        Attribute att = structure.attribute(i);
        String attName = att.name();
        attName = attName.replaceAll("[^\\w]", "_");
        attName = m_DataBaseConnection.maskKeyword(attName);
        if (m_DataBaseConnection.getUpperCase())
            query.append(attName.toUpperCase());
        else
            query.append(attName);
        if (att.isDate())
            query.append(" " + m_createDate);
        else {
            if (att.isNumeric())
                query.append(" " + m_createDouble);
            else
                query.append(" " + m_createText);
        }
        if (i != structure.numAttributes() - 1)
            query.append(", ");
    }
    query.append(" )");
    //System.out.println(query.toString());
    m_DataBaseConnection.update(query.toString());
    m_DataBaseConnection.close();
    if (!m_DataBaseConnection.tableExists(m_tableName)) {
        throw new IOException("Table cannot be built.");
    }
}

From source file:core.me.Context.java

License:Open Source License

public Instance buildSCAInstance() {
    Instances dataStruc = Classifiers.getInst(false).getDataStructSCA();

    double[] values = new double[2];
    values[0] = this.theSymbol.getRatio();
    values[1] = dataStruc.attribute(1).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);//from w w  w .  j a va2s  .  co m
    inst.setClassMissing();
    return inst;
}

From source file:core.me.Context.java

License:Open Source License

public Instance buildSCBInstance() {
    Instances dataStruc = Classifiers.getInst(false).getDataStructSCB();

    double H = 0, D = 0, DX = 0.;
    int parentClass = 1;

    if (this.getParent() != null) {
        H = this.getH();
        D = this.getD();
        DX = this.getDX();
        parentClass = this.getParentSymbol().getSymbolClass();
    }/* w w w  .  j a v  a  2  s.  c  om*/

    double[] values = new double[5];
    values[0] = H;
    values[1] = D;
    values[2] = DX;
    values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(parentClass));
    values[4] = dataStruc.attribute(4).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();

    return inst;
}

From source file:core.me.Context.java

License:Open Source License

public Instance buildSCC1Instance() {
    Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.INLINE);

    double LH, LD, LDX;
    int LCLASS;//  w ww .j  av a  2  s.c om
    double height = (double) this.theSymbol.getHeight();

    LH = height / ((double) this.horSymbol.getHeight());
    LD = (this.theSymbol.getCenter() - this.horSymbol.getCenter()) / height;
    LDX = this.getHor().getDX();
    LCLASS = this.horSymbol.getSymbolClass();

    double[] values = new double[5];
    values[0] = LH;
    values[1] = LD;
    values[2] = LDX;
    values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(LCLASS));
    values[4] = dataStruc.attribute(4).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();
    return inst;

}

From source file:core.me.Context.java

License:Open Source License

public Instance buildSCC2Instance() {
    Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.SUPERSCRIPT);

    double EH, ED, EDX;
    int ECLASS;/*from w ww. j  a  va  2s .c  o  m*/
    double height = (double) this.theSymbol.getHeight();

    EH = height / ((double) this.supSymbol.getHeight());
    ED = (this.theSymbol.getCenter() - this.supSymbol.getCenter()) / height;
    EDX = this.getSup().getDX();
    ECLASS = this.supSymbol.getSymbolClass();

    double[] values = new double[5];
    values[0] = EH;
    values[1] = ED;
    values[2] = EDX;
    values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(ECLASS));
    values[4] = dataStruc.attribute(4).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();
    return inst;

}

From source file:core.me.Context.java

License:Open Source License

public Instance buildSCC3Instance() {
    Instances dataStruc = Classifiers.getInst(false).getDataStructSCC(Relationship.SUBSCRIPT);

    double SH, SD, SDX;
    int SCLASS;//from  www  .  ja va  2 s  . com
    double height = (double) this.theSymbol.getHeight();

    SH = height / ((double) this.subSymbol.getHeight());
    SD = (this.theSymbol.getCenter() - this.subSymbol.getCenter()) / height;
    SDX = this.getSub().getDX();
    SCLASS = this.subSymbol.getSymbolClass();

    double[] values = new double[5];
    values[0] = SH;
    values[1] = SD;
    values[2] = SDX;
    values[3] = dataStruc.attribute(3).indexOfValue(Integer.toString(SCLASS));
    values[4] = dataStruc.attribute(4).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();
    return inst;

}

From source file:core.me.Context.java

License:Open Source License

public Instance buildRCInstance() {

    Instances dataStruc = Classifiers.getInst(false).getDataStructRC();

    double H = 0, D = 0, DX = 0.;
    int parentClass = 1;

    if (this.getParent() != null) {
        H = ((double) this.getParentSymbol().getHeight()) / this.theSymbol.getHeight();
        D = (this.getParentSymbol().getCenter() - this.theSymbol.getCenter())
                / this.getParentSymbol().getHeight();
        DX = this.getDX();
        parentClass = this.getParentSymbol().getSymbolClass();
    }/*  ww w . j a v  a2 s  .  c  o m*/

    double[] values = new double[6];
    values[0] = H;
    values[1] = D;
    values[2] = DX;
    values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get());
    values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass);
    values[5] = dataStruc.attribute(5).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();

    return inst;

}