Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:Controller.CtlDataMining.java

public String mineria(String mensaje) {

    // El mensaje tipo String lo convertimos a un StringReader
    StringReader sr = new StringReader(mensaje);
    // el StringReader lo convertimos a un Buffer
    BufferedReader br = new BufferedReader(sr);

    try {/*from  ww  w .  j a  v a  2 s.  c  o m*/
        //definimos un objeto que contendra los datos a clasificar
        Instances data = new Instances(br);
        //Seleccionamos cual sera el atributo clase                
        data.setClassIndex(data.numAttributes() - 1);
        //cerramos el objeto buffer
        br.close();

        /*Obtenemos resultados*/
        String descripcion = definirEncabezado(data);
        String resultadoBayesiano = redBayesiana(data);
        String resultadoJ48 = arbolJ48(data);
        String resultadoApriori = apriori(data);

        /*Se concatenan resultados y se envian*/
        String res = descripcion + "\n" + resultadoBayesiano + "\n" + resultadoJ48 + "\n" + resultadoApriori;//se le aade el contenido al objeto de tipo mensaje            

        return res;

    } catch (Exception e) {
        return "El error es" + e.getMessage();

    }
}

From source file:controller.DecisionTreeServlet.java

@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    request.setCharacterEncoding("UTF-8");
    String dir = "/data/";
    String path = getServletContext().getRealPath(dir);

    String action = request.getParameter("action");

    switch (action) {
    case "create": {
        String fileName = request.getParameter("file");

        String aux = fileName.substring(0, fileName.indexOf("."));
        String pathInput = path + "/" + request.getParameter("file");
        String pathTrainingOutput = path + "/" + aux + "-training-arff.txt";
        String pathTestOutput = path + "/" + aux + "-test-arff.txt";
        String pathDecisionTree = path + "/" + aux + "-decisionTree.txt";

        String name = request.getParameter("name");
        int range = Integer.parseInt(request.getParameter("range"));

        int size = Integer.parseInt(request.getParameter("counter"));
        String[] columns = new String[size];
        String[] types = new String[size];
        int[] positions = new int[size];
        int counter = 0;
        for (int i = 0; i < size; i++) {
            if (request.getParameter("column-" + (i + 1)) != null) {
                columns[counter] = request.getParameter("column-" + (i + 1));
                types[counter] = request.getParameter("type-" + (i + 1));
                positions[counter] = Integer.parseInt(request.getParameter("position-" + (i + 1)));
                counter++;/*from   w  w  w .j  a v a 2s  .  c  o m*/
            }
        }

        FormatFiles.convertTxtToArff(pathInput, pathTrainingOutput, pathTestOutput, name, columns, types,
                positions, counter, range);
        try {
            J48 j48 = new J48();

            BufferedReader readerTraining = new BufferedReader(new FileReader(pathTrainingOutput));
            Instances instancesTraining = new Instances(readerTraining);
            instancesTraining.setClassIndex(instancesTraining.numAttributes() - 1);

            j48.buildClassifier(instancesTraining);

            BufferedReader readerTest = new BufferedReader(new FileReader(pathTestOutput));
            //BufferedReader readerTest = new BufferedReader(new FileReader(pathTrainingOutput));
            Instances instancesTest = new Instances(readerTest);
            instancesTest.setClassIndex(instancesTest.numAttributes() - 1);

            int corrects = 0;
            int truePositive = 0;
            int trueNegative = 0;
            int falsePositive = 0;
            int falseNegative = 0;

            for (int i = 0; i < instancesTest.size(); i++) {
                Instance instance = instancesTest.get(i);
                double correctValue = instance.value(instance.attribute(instancesTest.numAttributes() - 1));
                double classification = j48.classifyInstance(instance);

                if (correctValue == classification) {
                    corrects++;
                }
                if (correctValue == 1 && classification == 1) {
                    truePositive++;
                }
                if (correctValue == 1 && classification == 0) {
                    falseNegative++;
                }
                if (correctValue == 0 && classification == 1) {
                    falsePositive++;
                }
                if (correctValue == 0 && classification == 0) {
                    trueNegative++;
                }
            }

            Evaluation eval = new Evaluation(instancesTraining);
            eval.evaluateModel(j48, instancesTest);

            PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(pathDecisionTree, false)));

            writer.println(j48.toString());

            writer.println("");
            writer.println("");
            writer.println("Results");
            writer.println(eval.toSummaryString());

            writer.close();

            response.sendRedirect("DecisionTree?action=view&corrects=" + corrects + "&totalTest="
                    + instancesTest.size() + "&totalTrainig=" + instancesTraining.size() + "&truePositive="
                    + truePositive + "&trueNegative=" + trueNegative + "&falsePositive=" + falsePositive
                    + "&falseNegative=" + falseNegative + "&fileName=" + aux + "-decisionTree.txt");
        } catch (Exception e) {
            System.out.println(e.getMessage());
            response.sendRedirect("Navigation?action=decisionTree");
        }

        break;
    }
    default:
        response.sendError(404);
    }
}

From source file:controller.MineroControler.java

public String regresionLineal() {
    BufferedReader breader = null;
    Instances datos = null;
    breader = new BufferedReader(fuente_arff);
    try {/*w w w  .j a v a 2s. co m*/
        datos = new Instances(breader);
        datos.setClassIndex(datos.numAttributes() - 1); // clase principal, ltima en atributos
    } catch (IOException ex) {
        System.err.println("Problemas al intentar cargar los datos");
    }

    LinearRegression regresionL = new LinearRegression();
    try {

        regresionL.buildClassifier(datos);

        Instance nuevaCal = datos.lastInstance();
        double calif = regresionL.classifyInstance(nuevaCal);

        setValorCalculado(new Double(calif));

    } catch (Exception ex) {
        System.err.println("Problemas al clasificar instancia");
    }

    return regresionL.toString();
}

From source file:controller.NaiveBayesServlet.java

@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    request.setCharacterEncoding("UTF-8");
    String dir = "/data/";
    String path = getServletContext().getRealPath(dir);

    String action = request.getParameter("action");

    switch (action) {
    case "create": {
        String fileName = request.getParameter("file");

        String aux = fileName.substring(0, fileName.indexOf("."));
        String pathInput = path + "/" + request.getParameter("file");
        String pathTrainingOutput = path + "/" + aux + "-training-arff.txt";
        String pathTestOutput = path + "/" + aux + "-test-arff.txt";
        String pathNaivebayes = path + "/" + aux + "-naiveBayes.txt";

        String name = request.getParameter("name");
        int range = Integer.parseInt(request.getParameter("range"));

        int size = Integer.parseInt(request.getParameter("counter"));
        String[] columns = new String[size];
        String[] types = new String[size];
        int[] positions = new int[size];
        int counter = 0;

        for (int i = 0; i < size; i++) {
            if (request.getParameter("column-" + (i + 1)) != null) {
                columns[counter] = request.getParameter("column-" + (i + 1));
                types[counter] = request.getParameter("type-" + (i + 1));
                positions[counter] = Integer.parseInt(request.getParameter("position-" + (i + 1)));
                counter++;/*from w w  w  .  j ava 2s.  c  o  m*/
            }
        }

        FormatFiles.convertTxtToArff(pathInput, pathTrainingOutput, pathTestOutput, name, columns, types,
                positions, counter, range);

        try {
            NaiveBayes naiveBayes = new NaiveBayes();

            BufferedReader readerTraining = new BufferedReader(new FileReader(pathTrainingOutput));
            Instances instancesTraining = new Instances(readerTraining);
            instancesTraining.setClassIndex(instancesTraining.numAttributes() - 1);

            naiveBayes.buildClassifier(instancesTraining);

            BufferedReader readerTest = new BufferedReader(new FileReader(pathTestOutput));
            //BufferedReader readerTest = new BufferedReader(new FileReader(pathTrainingOutput));
            Instances instancesTest = new Instances(readerTest);
            instancesTest.setClassIndex(instancesTest.numAttributes() - 1);

            Evaluation eval = new Evaluation(instancesTraining);
            eval.evaluateModel(naiveBayes, instancesTest);

            int corrects = 0;
            int truePositive = 0;
            int trueNegative = 0;
            int falsePositive = 0;
            int falseNegative = 0;

            for (int i = 0; i < instancesTest.size(); i++) {
                Instance instance = instancesTest.get(i);
                double correctValue = instance.value(instance.attribute(instancesTest.numAttributes() - 1));
                double classification = naiveBayes.classifyInstance(instance);

                if (correctValue == classification) {
                    corrects++;
                }
                if (correctValue == 1 && classification == 1) {
                    truePositive++;
                }
                if (correctValue == 1 && classification == 0) {
                    falseNegative++;
                }
                if (correctValue == 0 && classification == 1) {
                    falsePositive++;
                }
                if (correctValue == 0 && classification == 0) {
                    trueNegative++;
                }
            }

            PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(pathNaivebayes, false)));

            writer.println(naiveBayes.toString());

            writer.println("");
            writer.println("");
            writer.println("Results");
            writer.println(eval.toSummaryString());

            writer.close();

            response.sendRedirect(
                    "NaiveBayes?action=view&corrects=" + corrects + "&totalTest=" + instancesTest.size()
                            + "&totalTrainig=" + instancesTraining.size() + "&range=" + range + "&truePositive="
                            + truePositive + "&trueNegative=" + trueNegative + "&falsePositive=" + falsePositive
                            + "&falseNegative=" + falseNegative + "&fileName=" + aux + "-naiveBayes.txt");

        } catch (Exception e) {
            System.out.println(e.getMessage());
            response.sendRedirect("Navigation?action=naiveBayes");
        }

        break;
    }
    default:
        response.sendError(404);
    }

}

From source file:core.classification.Classifiers.java

License:Open Source License

public void trainSC() throws Exception {
    String sql;/*ww  w .  j  av a 2 s .c  o m*/

    // ---
    // Connect to the database
    // ---
    InstanceQuery query = new InstanceQuery();
    query.setDatabaseURL(dbase);
    query.setUsername("");
    query.setPassword("");

    // ---
    // ---
    // SCA
    // ---
    // ---

    sql = "SELECT ";
    sql += "CR.ratio, CR.class ";
    sql += "FROM Class_ratio AS CR;";

    query.setQuery(sql);
    Instances data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    String[] options = Utils.splitOptions(
            "-D -Q weka.classifiers.bayes.net.search.local.K2 -- -P 1 -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5");
    SCA.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train the classifier
    // ---
    System.out.println("Building SCA ...");
    SCA.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Classifier evaluation
    // ---
    System.out.println("Cross-validation for SCA...");
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(SCA, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for SCA: \n\n", false));

    // ---
    // ---
    // SCB
    // ---
    // ---

    sql = "SELECT ";
    sql += "Data.H2, Data.D2, Data.DX, ";
    sql += "Data.PARENT_CHAR AS PCLASS, ";
    sql += "Data.CLASS ";
    sql += "FROM Data ";
    sql += "WHERE (((Data.SEGERR)=0) AND (Data.PARENT_CHAR<>'0') );";

    query.setQuery(sql);
    data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    options = Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a");
    SCB.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train the classifier
    // ---
    System.out.println("Building SCB ...");
    SCB.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Classifier evaluation
    // ---
    System.out.println("Cross-validation for SCB...");
    eval = new Evaluation(data);
    eval.crossValidateModel(SCB, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for SCB: \n\n", false));

    // ---
    // ---
    // SCC
    // ---
    // ---

    // ----
    // SCC1
    // ----

    sql = "SELECT ";
    sql += "Data.LH, Data.LD, Data.LDX, Data.LCLASS, ";
    sql += "Data.CLASS ";
    sql += "FROM Data ";
    sql += "WHERE ( (Data.SEGERR)=0  AND ( (Data.LCLASS)<>'0' ) );";

    query.setQuery(sql);
    data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    options = Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a");
    SCC1.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train the classifier
    // ---
    System.out.println("Building SCC1 ...");
    SCC1.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Classifier evaluation
    // ---
    System.out.println("Cross-validation for SCC1...");
    eval = new Evaluation(data);
    eval.crossValidateModel(SCC1, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for SCC1: \n\n", false));

    // ----
    // SCC2
    // ----

    sql = "SELECT ";
    sql += "Data.EH, Data.ED, Data.EDX, Data.ECLASS, ";
    sql += "Data.CLASS ";
    sql += "FROM Data ";
    sql += "WHERE ( (Data.SEGERR)=0  AND ( (Data.ECLASS)<>'0' ) );";

    query.setQuery(sql);
    data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    //      options = Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a");
    SCC2.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train the classifier
    // ---
    System.out.println("Building SCC2 ...");
    SCC2.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Classifier evaluation
    // ---
    System.out.println("Cross-validation for SCC2...");
    eval = new Evaluation(data);
    eval.crossValidateModel(SCC2, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for SCC2: \n\n", false));

    // ----
    // SCC3
    // ----

    sql = "SELECT ";
    sql += "Data.SH, Data.SD, Data.SDX, Data.SCLASS, ";
    sql += "Data.CLASS ";
    sql += "FROM Data ";
    sql += "WHERE ( (Data.SEGERR)=0  AND ( (Data.SCLASS)<>'0' ) );";

    query.setQuery(sql);
    data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    //      options = Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a");
    SCC3.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train the classifier
    // ---
    System.out.println("Building SCC3 ...");
    SCC3.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Classifier evaluation
    // ---
    System.out.println("Cross-validation for SCC3...");
    eval = new Evaluation(data);
    eval.crossValidateModel(SCC3, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for SCC3: \n\n", false));

}

From source file:core.classification.Classifiers.java

License:Open Source License

public void trainRC() throws Exception {
    // ---//  w ww  . j a  v a  2s. c o m
    // Retrieve the instances in the database
    // ---
    InstanceQuery query = new InstanceQuery();
    query.setDatabaseURL(dbase);
    query.setUsername("");
    query.setPassword("");

    String sql = "SELECT ";
    sql += "Data.H2, Data.D2, Data.DX, ";
    sql += "Data.CLASS, Data.PARENT_CHAR AS PCLASS, ";
    sql += "Data.RELID ";
    sql += "FROM Data ";
    sql += "WHERE (((Data.SEGERR)=0) AND (Data.PARENT_CHAR<>'0') );";

    query.setQuery(sql);
    Instances data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    //      String[] options = Utils.splitOptions("-L 0.2 -M 0.2 -N 50 -V 0 -S 0 -E 20 -H 5 ");
    String[] options = Utils.splitOptions(
            "-cost-matrix \"[0.0 1.0 1.0 0.1 0.1; 1.0 0.0 1.0 0.1 0.1; 1.0 1.0 0.0 0.1 0.1; 10.0 10.0 10.0 0.0 1.0; 10.0 10.0 10.0 1.0 0.0]\" -S 1 -W weka.classifiers.functions.MultilayerPerceptron -- -L 0.2 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a");
    RC.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train
    // ---
    System.out.println("Building RC...");
    RC.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Evaluation
    // ---
    System.out.println("Cross-validation for RC...");
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(RC, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for RC: \n\n", false));

}

From source file:core.classification.Classifiers.java

License:Open Source License

public void trainYNC() throws Exception {
    // ---/*from w  w  w  . jav a2  s  .  c  om*/
    // Retrieve the instances in the database
    // ---
    InstanceQuery query = new InstanceQuery();
    query.setDatabaseURL(dbase);
    query.setUsername("");
    query.setPassword("");

    String sql = "SELECT ";
    sql += "YNCdata.PCLASS, YNCdata.CCLASS, YNCdata.RAREA, YNCdata.H, YNCdata.D, YNCdata.V, ";
    sql += "YNCdata.YN ";
    sql += "FROM YNCdata ";

    query.setQuery(sql);
    Instances data = query.retrieveInstances();

    // ---
    // Setting options
    // ---
    String[] options = Utils.splitOptions("-R -N 3 -Q 1 -M 30");
    YNC.setOptions(options);
    data.setClassIndex(data.numAttributes() - 1);

    // ---
    // Train
    // ---
    System.out.println("Building YC...");
    YNC.buildClassifier(data);
    System.out.println("Done.");

    // ---
    // Evaluation
    // ---
    System.out.println("Cross-validation for YNC...");
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(YNC, data, 10, new Random(1));
    System.out.println("Done.");
    System.out.println(eval.toSummaryString("\n Results for YNC: \n\n", false));

}

From source file:core.classifier.MyFirstClassifier.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems./* w ww  . java  2 s.c o  m*/
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates a clusterer with the options given in an array of
 * strings. It takes the string indicated by "-t" as training file, the
 * string indicated by "-T" as test file.
 * If the test file is missing, a stratified ten-fold
 * cross-validation is performed (distribution clusterers only).
 * Using "-x" you can change the number of
 * folds to be used, and using "-s" the random seed.
 * If the "-p" option is present it outputs the classification for
 * each test instance. If you provide the name of an object file using
 * "-l", a clusterer will be loaded from the given file. If you provide the
 * name of an object file using "-d", the clusterer built from the
 * training data will be saved to the given file.
 *
 * @param clusterer machine learning clusterer
 * @param options the array of string containing the options
 * @throws Exception if model could not be evaluated successfully
 * @return a string describing the results 
 *///w w w. j  a  v  a  2s  .  c o  m
public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {

    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Random random;
    String trainFileName, testFileName, seedString, foldsString;
    String objectInputFileName, objectOutputFileName, attributeRangeString;
    String graphFileName;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {

        // global info requested as well?
        boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

        throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo));
    }

    try {
        // Get basic options (options the same for all clusterers
        //printClusterAssignments = Utils.getFlag('p', options);
        objectInputFileName = Utils.getOption('l', options);
        objectOutputFileName = Utils.getOption('d', options);
        trainFileName = Utils.getOption('t', options);
        testFileName = Utils.getOption('T', options);
        graphFileName = Utils.getOption('g', options);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClusterAssignments = true;
            if (!attributeRangeString.equals("0"))
                attributesToOutput = new Range(attributeRangeString);
        }

        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }

            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test file given.");
            }
        } else {
            if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
                throw new Exception("Can't use both train and model file " + "unless -p specified.");
            }
        }

        seedString = Utils.getOption('s', options);

        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }

        foldsString = Utils.getOption('x', options);

        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
            doXval = true;
        }
    } catch (Exception e) {
        throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false));
    }

    try {
        if (trainFileName.length() != 0) {
            source = new DataSource(trainFileName);
            train = source.getStructure();

            String classString = Utils.getOption('c', options);
            if (classString.length() != 0) {
                if (classString.compareTo("last") == 0)
                    theClass = train.numAttributes();
                else if (classString.compareTo("first") == 0)
                    theClass = 1;
                else
                    theClass = Integer.parseInt(classString);

                if (theClass != -1) {
                    if (doXval || testFileName.length() != 0)
                        throw new Exception("Can only do class based evaluation on the " + "training data");

                    if (objectInputFileName.length() != 0)
                        throw new Exception("Can't load a clusterer and do class based " + "evaluation");

                    if (objectOutputFileName.length() != 0)
                        throw new Exception("Can't do class based evaluation and save clusterer");
                }
            } else {
                // if the dataset defines a class attribute, use it
                if (train.classIndex() != -1) {
                    theClass = train.classIndex() + 1;
                    System.err
                            .println("Note: using class attribute from dataset, i.e., attribute #" + theClass);
                }
            }

            if (theClass != -1) {
                if (theClass < 1 || theClass > train.numAttributes())
                    throw new Exception("Class is out of range!");

                if (!train.attribute(theClass - 1).isNominal())
                    throw new Exception("Class must be nominal!");

                train.setClassIndex(theClass - 1);
            }
        }
    } catch (Exception e) {
        throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
        savedOptions = new String[options.length];
        System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
        Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
        ((OptionHandler) clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
        // Load the clusterer from file
        //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
        java.io.ObjectInputStream ois = new java.io.ObjectInputStream(
                new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName)));
        clusterer = (Clusterer) ois.readObject();
        // try and get the training header
        try {
            trainHeader = (Instances) ois.readObject();
        } catch (Exception ex) {
            // don't moan if we cant
        }
    } else {
        // Build the clusterer if no object file provided
        if (theClass == -1) {
            if (updateable) {
                clusterer.buildClusterer(source.getStructure());
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    ((UpdateableClusterer) clusterer).updateClusterer(inst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                clusterer.buildClusterer(source.getDataSet());
            }
        } else {
            Remove removeClass = new Remove();
            removeClass.setAttributeIndices("" + theClass);
            removeClass.setInvertSelection(false);
            removeClass.setInputFormat(train);
            if (updateable) {
                Instances clusterTrain = Filter.useFilter(train, removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    removeClass.input(inst);
                    removeClass.batchFinished();
                    Instance clusterTrainInst = removeClass.output();
                    ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
            }
            ClusterEvaluationEX ce = new ClusterEvaluationEX();
            ce.setClusterer(clusterer);
            ce.evaluateClusterer(train, trainFileName);

            return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
        }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
        return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
            "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
        // check header compatibility
        DataSource test = new DataSource(testFileName);
        Instances testStructure = test.getStructure();
        if (!trainHeader.equalHeaders(testStructure)) {
            throw new Exception("Training and testing data are not compatible\n");
        }

        text.append("\n\n=== Clustering stats for testing data ===\n\n"
                + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0)
            && (objectInputFileName.length() == 0)) {
        // cross validate the log likelihood on the training data
        random = new Random(seed);
        random.setSeed(seed);
        train = source.getDataSet();
        train.randomize(random);
        text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        //SerializationHelper.write(objectOutputFileName, clusterer);
        saveClusterer(objectOutputFileName, clusterer, trainHeader);
    }

    // If classifier is drawable output string describing graph
    if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) {
        BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
        writer.write(((Drawable) clusterer).graph());
        writer.newLine();
        writer.flush();
        writer.close();
    }

    return text.toString();
}

From source file:core.DatabaseSaverEx.java

License:Open Source License

/** 
 * Writes the structure (header information) to a database by creating a new table.
 * /*from   w w  w .java2  s . c  om*/
 * @throws Exception if something goes wrong
 */
private void writeStructure() throws Exception {

    StringBuffer query = new StringBuffer();
    Instances structure = getInstances();
    query.append("CREATE TABLE ");
    if (m_tabName || m_tableName.equals(""))
        m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName());
    if (m_DataBaseConnection.getUpperCase()) {
        m_tableName = m_tableName.toUpperCase();
        m_createInt = m_createInt.toUpperCase();
        m_createDouble = m_createDouble.toUpperCase();
        m_createText = m_createText.toUpperCase();
        m_createDate = m_createDate.toUpperCase();
    }
    m_tableName = m_tableName.replaceAll("[^\\w]", "_");
    m_tableName = m_DataBaseConnection.maskKeyword(m_tableName);
    query.append(m_tableName);
    if (structure.numAttributes() == 0)
        throw new Exception("Instances have no attribute.");
    query.append(" ( ");
    if (m_id) {
        if (m_DataBaseConnection.getUpperCase())
            m_idColumn = m_idColumn.toUpperCase();
        query.append(m_DataBaseConnection.maskKeyword(m_idColumn));
        query.append(" ");
        query.append(m_createInt);
        query.append(" PRIMARY KEY,");
    }
    for (int i = 0; i < structure.numAttributes(); i++) {
        Attribute att = structure.attribute(i);
        String attName = att.name();
        attName = attName.replaceAll("[^\\w]", "_");
        attName = m_DataBaseConnection.maskKeyword(attName);
        if (m_DataBaseConnection.getUpperCase())
            query.append(attName.toUpperCase());
        else
            query.append(attName);
        if (att.isDate())
            query.append(" " + m_createDate);
        else {
            if (att.isNumeric())
                query.append(" " + m_createDouble);
            else
                query.append(" " + m_createText);
        }
        if (i != structure.numAttributes() - 1)
            query.append(", ");
    }
    query.append(" )");
    //System.out.println(query.toString());
    m_DataBaseConnection.update(query.toString());
    m_DataBaseConnection.close();
    if (!m_DataBaseConnection.tableExists(m_tableName)) {
        throw new IOException("Table cannot be built.");
    }
}