Example usage for weka.core Instances classAttribute

List of usage examples for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute() 

Source Link

Document

Returns the class attribute.

Usage

From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();//  w  w  w  .  ja v a  2  s.c o  m

    /** Changed here: Use supplied classifier */
    //super.buildClassifier(data);
    /** End change */

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0));
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];
            bagData = data.resampleWithWeights(random, inBag[j]);
        } else {
            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        /** Changed here: Use supplied classifier */
        /*if (m_Classifier instanceof Randomizable) {
          ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }
                
        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);*/
        /** End change */
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] += m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);
                }
                vote = Utils.maxIndex(votes); // predicted class
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:com.reactivetechnologies.analytics.core.eval.StackingWithBuiltClassifiers.java

License:Open Source License

/**
 * Buildclassifier selects a classifier from the set of classifiers
 * by minimising error on the training data.
 *
 * @param data the training data to be used for generating the
 * boosted classifier./*from   w  w w  .  j  a  v  a 2s  .c om*/
 * @throws Exception if the classifier could not be built successfully
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    if (m_MetaClassifier == null) {
        throw new IllegalArgumentException("No meta classifier has been set");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    Instances newData = new Instances(data);
    m_BaseFormat = new Instances(data, 0);
    newData.deleteWithMissingClass();

    Random random = new Random(m_Seed);
    newData.randomize(random);
    if (newData.classAttribute().isNominal()) {
        newData.stratify(m_NumFolds);
    }

    // Create meta level
    generateMetaLevel(newData, random);

    /** Changed here */
    // DO NOT Rebuilt all the base classifiers on the full training data
    /*for (int i = 0; i < m_Classifiers.length; i++) {
      getClassifier(i).buildClassifier(newData);
    }*/
    /** End change */
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args/*from  www.  ja  va 2  s.c o  m*/
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.tum.classifiertest.FastRfBagging.java

License:Open Source License

/**
 * Compute the out-of-bag error for a set of instances.
 *
 * @param data       the instances//from ww w.j a v a2 s. c om
 * @param inBag      numTrees x numInstances indicating out-of-bag instances
 * @param threadPool the pool of threads
 *
 * @return the oob error
 */
private double computeOOBError(Instances data, boolean[][] inBag, ExecutorService threadPool)
        throws InterruptedException, ExecutionException {

    boolean numeric = data.classAttribute().isNumeric();

    List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances());
    for (int i = 0; i < data.numInstances(); i++) {
        VotesCollector aCollector = new VotesCollector(m_Classifiers, i, data, inBag);
        votes.add(threadPool.submit(aCollector));
    }

    double outOfBagCount = 0.0;
    double errorSum = 0.0;

    for (int i = 0; i < data.numInstances(); i++) {

        double vote = votes.get(i).get();

        // error for instance
        outOfBagCount += data.instance(i).weight();
        if (numeric) {
            errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
        } else {
            if (vote != data.instance(i).classValue())
                errorSum += data.instance(i).weight();
        }

    }

    return errorSum / outOfBagCount;
}

From source file:core.classifier.MyFirstClassifier.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.//from ww  w.j a  v  a 2  s.  c o  m
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluate the clusterer on a set of instances. Calculates clustering
 * statistics and stores cluster assigments for the instances in
 * m_clusterAssignments/* w ww . j av a2  s . c o m*/
 * 
 * @param test the set of instances to cluster
 * @param testFileName the name of the test file for incremental testing, 
 * if "" or null then not used
 * @param outputModel true if the clustering model is to be output as well
 * as the stats
 * 
 * @throws Exception if something goes wrong
 */
public void evaluateClusterer(Instances test, String testFileName, boolean outputModel) throws Exception {
    int i = 0;
    int cnum;
    double loglk = 0.0;
    int cc = m_Clusterer.numberOfClusters();
    m_numClusters = cc;
    double[] instanceStats = new double[cc];
    Instances testRaw = null;
    boolean hasClass = (test.classIndex() >= 0);
    int unclusteredInstances = 0;
    Vector<Double> clusterAssignments = new Vector<Double>();
    Filter filter = null;
    DataSource source = null;
    Instance inst;

    if (testFileName == null)
        testFileName = "";

    // load data
    if (testFileName.length() != 0)
        source = new DataSource(testFileName);
    else
        source = new DataSource(test);
    testRaw = source.getStructure(test.classIndex());

    // If class is set then do class based evaluation as well
    if (hasClass) {
        if (testRaw.classAttribute().isNumeric())
            throw new Exception("ClusterEvaluation: Class must be nominal!");

        filter = new Remove();
        ((Remove) filter).setAttributeIndices("" + (testRaw.classIndex() + 1));
        ((Remove) filter).setInvertSelection(false);
        filter.setInputFormat(testRaw);
    }

    i = 0;
    while (source.hasMoreElements(testRaw)) {
        // next instance
        inst = source.nextElement(testRaw);
        if (filter != null) {
            filter.input(inst);
            filter.batchFinished();
            inst = filter.output();
        }

        cnum = -1;
        try {
            if (m_Clusterer instanceof DensityBasedClusterer) {
                loglk += ((DensityBasedClusterer) m_Clusterer).logDensityForInstance(inst);
                cnum = m_Clusterer.clusterInstance(inst);
                clusterAssignments.add((double) cnum);
            } else {
                cnum = m_Clusterer.clusterInstance(inst);
                clusterAssignments.add((double) cnum);
            }
        } catch (Exception e) {
            clusterAssignments.add(-1.0);
            unclusteredInstances++;
        }

        if (cnum != -1) {
            instanceStats[cnum]++;
        }
    }

    double sum = Utils.sum(instanceStats);
    loglk /= sum;
    m_logL = loglk;
    m_clusterAssignments = new double[clusterAssignments.size()];
    for (i = 0; i < clusterAssignments.size(); i++) {
        m_clusterAssignments[i] = clusterAssignments.get(i);
    }
    int numInstFieldWidth = (int) ((Math.log(clusterAssignments.size()) / Math.log(10)) + 1);

    if (outputModel) {
        m_clusteringResults.append(m_Clusterer.toString());
    }
    m_clusteringResults.append("Clustered Instances\n\n");
    int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1);
    for (i = 0; i < cc; i++) {
        if (instanceStats[i] > 0)
            m_clusteringResults.append(Utils.doubleToString((double) i, clustFieldWidth, 0) + "      "
                    + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) + " ("
                    + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0) + "%)\n");
    }

    if (unclusteredInstances > 0)
        m_clusteringResults.append("\nUnclustered instances : " + unclusteredInstances);

    if (m_Clusterer instanceof DensityBasedClusterer)
        m_clusteringResults.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n");

    if (hasClass) {
        evaluateClustersWithRespectToClass(test, testFileName);
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates cluster assignments with respect to actual class labels.
 * Assumes that m_Clusterer has been trained and tested on 
 * inst (minus the class)./*from w w  w. j a v  a 2s.c  o m*/
 * 
 * @param inst the instances (including class) to evaluate with respect to
 * @param fileName the name of the test file for incremental testing, 
 * if "" or null then not used
 * @throws Exception if something goes wrong
 */
private void evaluateClustersWithRespectToClass(Instances inst, String fileName) throws Exception {

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int[m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters + 1];
    double[] current = new double[m_numClusters + 1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;

    if (fileName == null)
        fileName = "";

    if (fileName.length() != 0) {
        source = new DataSource(fileName);
    } else
        source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());

    i = 0;
    while (source.hasMoreElements(instances)) {
        instance = source.nextElement(instances);
        if (m_clusterAssignments[i] >= 0) {
            counts[(int) m_clusterAssignments[i]][(int) instance.classValue()]++;
            clusterTotals[(int) m_clusterAssignments[i]]++;
        }
        i++;
    }
    numInstances = i;

    best[m_numClusters] = Double.MAX_VALUE;
    mapClasses(m_numClusters, 0, counts, clusterTotals, current, best, 0);

    m_clusteringResults.append("\n\nClass attribute: " + inst.classAttribute().name() + "\n");
    m_clusteringResults.append("Classes to Clusters:\n");
    String matrixString = toMatrixString(counts, clusterTotals, new Instances(inst, 0));
    m_clusteringResults.append(matrixString).append("\n");

    int Cwidth = 1 + (int) (Math.log(m_numClusters) / Math.log(10));
    // add the minimum error assignment
    for (i = 0; i < m_numClusters; i++) {
        if (clusterTotals[i] > 0) {
            m_clusteringResults.append("Cluster " + Utils.doubleToString((double) i, Cwidth, 0));
            m_clusteringResults.append(" <-- ");

            if (best[i] < 0) {
                m_clusteringResults.append("No class\n");
            } else {
                m_clusteringResults.append(inst.classAttribute().value((int) best[i])).append("\n");
            }
        }
    }
    m_clusteringResults.append("\nIncorrectly clustered instances :\t" + best[m_numClusters] + "\t"
            + (Utils.doubleToString((best[m_numClusters] / numInstances * 100.0), 8, 4)) + " %\n");

    // copy the class assignments
    m_classToCluster = new int[m_numClusters];
    for (i = 0; i < m_numClusters; i++) {
        m_classToCluster[i] = (int) best[i];
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Returns a "confusion" style matrix of classes to clusters assignments
 * @param counts the counts of classes for each cluster
 * @param clusterTotals total number of examples in each cluster
 * @param inst the training instances (with class)
 * @return the "confusion" style matrix as string
 * @throws Exception if matrix can't be generated
 *///ww  w.j  av  a2 s .  c  om
private String toMatrixString(int[][] counts, int[] clusterTotals, Instances inst) throws Exception {
    StringBuffer ms = new StringBuffer();

    int maxval = 0;
    for (int i = 0; i < m_numClusters; i++) {
        for (int j = 0; j < counts[i].length; j++) {
            if (counts[i][j] > maxval) {
                maxval = counts[i][j];
            }
        }
    }

    int Cwidth = 1
            + Math.max((int) (Math.log(maxval) / Math.log(10)), (int) (Math.log(m_numClusters) / Math.log(10)));

    ms.append("\n");

    for (int i = 0; i < m_numClusters; i++) {
        if (clusterTotals[i] > 0) {
            ms.append(" ").append(Utils.doubleToString((double) i, Cwidth, 0));
        }
    }
    ms.append("  <-- assigned to cluster\n");

    for (int i = 0; i < counts[0].length; i++) {

        for (int j = 0; j < m_numClusters; j++) {
            if (clusterTotals[j] > 0) {
                ms.append(" ").append(Utils.doubleToString((double) counts[j][i], Cwidth, 0));
            }
        }
        ms.append(" | ").append(inst.classAttribute().value(i)).append("\n");
    }

    return ms.toString();
}

From source file:core.TextDirectoryLoader.java

License:Open Source License

/**
 * Process input directories/files incrementally.
 *
 * @param structure ignored//from   www  .j  av  a 2s  . c  om
 * @return never returns without throwing an exception
 * @throws IOException if a problem occurs
 */
@Override
public Instance getNextInstance(Instances structure) throws IOException {
    // throw new
    // IOException("TextDirectoryLoader can't read data sets incrementally.");

    String directoryPath = getDirectory().getAbsolutePath();
    Attribute classAtt = structure.classAttribute();
    if (m_filesByClass == null) {
        m_filesByClass = new ArrayList<LinkedList<String>>();
        for (int i = 0; i < classAtt.numValues(); i++) {
            File classDir = new File(directoryPath + File.separator + classAtt.value(i));
            String[] files = classDir.list();
            LinkedList<String> classDocs = new LinkedList<String>();
            for (String cd : files) {
                File txt = new File(directoryPath + File.separator + classAtt.value(i) + File.separator + cd);
                if (txt.isFile()) {
                    classDocs.add(cd);
                }
            }
            m_filesByClass.add(classDocs);
        }
    }

    // cycle through the classes
    int count = 0;
    LinkedList<String> classContents = m_filesByClass.get(m_lastClassDir);
    boolean found = (classContents.size() > 0);
    while (classContents.size() == 0) {
        m_lastClassDir++;
        count++;
        if (m_lastClassDir == structure.classAttribute().numValues()) {
            m_lastClassDir = 0;
        }
        classContents = m_filesByClass.get(m_lastClassDir);
        if (classContents.size() > 0) {
            found = true; // we have an instance we can create
            break;
        }
        if (count == structure.classAttribute().numValues()) {
            break; // must be finished
        }
    }

    if (found) {
        String nextDoc = classContents.poll();
        File txt = new File(
                directoryPath + File.separator + classAtt.value(m_lastClassDir) + File.separator + nextDoc);

        BufferedReader is;
        if (m_charSet == null || m_charSet.length() == 0) {
            is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
        } else {
            is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
        }
        StringBuffer txtStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
            txtStr.append((char) c);
        }

        double[] newInst = null;
        if (m_OutputFilename) {
            newInst = new double[3];
        } else {
            newInst = new double[2];
        }

        newInst[0] = 0;
        structure.attribute(0).setStringValue(txtStr.toString());

        if (m_OutputFilename) {
            newInst[1] = 0;
            structure.attribute(1).setStringValue(txt.getAbsolutePath());
        }
        newInst[structure.classIndex()] = m_lastClassDir;
        Instance inst = new DenseInstance(1.0, newInst);
        inst.setDataset(structure);
        is.close();

        m_lastClassDir++;
        if (m_lastClassDir == structure.classAttribute().numValues()) {
            m_lastClassDir = 0;
        }

        return inst;
    } else {
        return null; // done!
    }
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Initializes all the counters for the evaluation and also takes a
 * cost matrix as parameter./* ww  w. j  a  v a  2  s  .  c  o m*/
 * Use <code>useNoPriors()</code> if the dataset is the test set and you
 * can't initialize with the priors from the training set via 
 * <code>setPriors(Instances)</code>.
 *
 * @param data    set of training instances, to get some header 
 *          information and prior class distribution information
 * @param costMatrix    the cost matrix---if null, default costs will be used
 * @throws Exception    if cost matrix is not compatible with 
 *          data, the class is not defined or the class is numeric
 * @see       #useNoPriors()
 * @see       #setPriors(Instances)
 */
public Evaluation_D(Instances data, CostMatrix costMatrix) throws Exception {

    m_NumClasses = data.numClasses();
    m_NumFolds = 1;
    m_ClassIsNominal = data.classAttribute().isNominal();

    if (m_ClassIsNominal) {
        m_ConfusionMatrix = new double[m_NumClasses][m_NumClasses];
        m_ClassNames = new String[m_NumClasses];
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassNames[i] = data.classAttribute().value(i);
        }
    }
    m_CostMatrix = costMatrix;
    if (m_CostMatrix != null) {
        if (!m_ClassIsNominal) {
            throw new Exception("Class has to be nominal if cost matrix " + "given!");
        }
        if (m_CostMatrix.size() != m_NumClasses) {
            throw new Exception("Cost matrix not compatible with data!");
        }
    }
    m_ClassPriors = new double[m_NumClasses];
    setPriors(data);
    m_MarginCounts = new double[k_MarginResolution + 1];
}