Example usage for weka.experiment OutputZipper OutputZipper

List of usage examples for weka.experiment OutputZipper OutputZipper

Introduction

In this page you can find the example usage for weka.experiment OutputZipper OutputZipper.

Prototype

public OutputZipper(File destination) throws Exception 

Source Link

Document

Constructor.

Usage

From source file:milk.experiment.MICrossValidationResultProducer.java

License:Open Source License

/**
   * Gets the results for a specified run number. Different run
   * numbers correspond to different randomizations of the data. Results
   * produced should be sent to the current ResultListener
   *//from w ww .  j  av a  2 s .c o m
   * @param run the run number to get results for.
   * @exception Exception if a problem occurs while getting the results
   */
  public void doRun(int run) throws Exception {

      if (getRawOutput()) {
          if (m_ZipDest == null) {
              m_ZipDest = new OutputZipper(m_OutputFile);
          }
      }

      if (m_Instances == null) {
          throw new Exception("No Instances set");
      }
      // Randomize on a copy of the original dataset
      Exemplars runInstances = new Exemplars(m_Instances);
      Random random = new Random(run);
      runInstances.randomize(random);
      if (runInstances.classAttribute().isNominal()) {
          runInstances.stratify(m_NumFolds);
      }
      for (int fold = 0; fold < m_NumFolds; fold++) {
          // Add in some fields to the key like run and fold number, dataset name
          Object[] seKey = m_SplitEvaluator.getKey();
          Object[] key = new Object[seKey.length + 3];
          key[0] = Utils.backQuoteChars(m_Instances.relationName());
          key[1] = "" + run;
          key[2] = "" + (fold + 1);
          System.arraycopy(seKey, 0, key, 3, seKey.length);
          if (m_ResultListener.isResultRequired(this, key)) {
              Exemplars train = runInstances.trainCV(m_NumFolds, fold, random);
              Exemplars test = runInstances.testCV(m_NumFolds, fold);
              try {
                  Object[] seResults = m_SplitEvaluator.getResult(train, test);
                  Object[] results = new Object[seResults.length + 1];
                  results[0] = getTimestamp();
                  System.arraycopy(seResults, 0, results, 1, seResults.length);
                  if (m_debugOutput) {
                      String resultName = ("" + run + "." + (fold + 1) + "."
                              + Utils.backQuoteChars(runInstances.relationName()) + "."
                              + m_SplitEvaluator.toString()).replace(' ', '_');
                      resultName = Utils.removeSubstring(resultName, "weka.classifiers.");
                      resultName = Utils.removeSubstring(resultName, "weka.filters.");
                      resultName = Utils.removeSubstring(resultName, "weka.attributeSelection.");
                      m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
                  }
                  m_ResultListener.acceptResult(this, key, results);
              } catch (Exception ex) {
                  // Save the train and test datasets for debugging purposes?
                  throw ex;
              }
          }
      }
  }

From source file:milk.experiment.MIRandomSplitResultProducer.java

License:Open Source License

/**
   * Gets the results for a specified run number. Different run
   * numbers correspond to different randomizations of the data. Results
   * produced should be sent to the current ResultListener
   */*  w  w  w  .  j  a va2 s.  c o m*/
   * @param run the run number to get results for.
   * @exception Exception if a problem occurs while getting the results
   */
  public void doRun(int run) throws Exception {

      if (getRawOutput()) {
          if (m_ZipDest == null) {
              m_ZipDest = new OutputZipper(m_OutputFile);
          }
      }

      if (m_Instances == null) {
          throw new Exception("No Instances set");
      }
      // Add in some fields to the key like run number, dataset name
      Object[] seKey = m_SplitEvaluator.getKey();
      Object[] key = new Object[seKey.length + 2];
      key[0] = Utils.backQuoteChars(m_Instances.relationName());
      key[1] = "" + run;
      System.arraycopy(seKey, 0, key, 2, seKey.length);
      if (m_ResultListener.isResultRequired(this, key)) {

          // Randomize on a copy of the original dataset
          Exemplars runInstances = new Exemplars(m_Instances);

          Exemplars train;
          Exemplars test;

          if (!m_randomize) {

              // Don't do any randomization
              int trainSize = Utils.round(runInstances.numExemplars() * m_TrainPercent / 100);
              int testSize = runInstances.numExemplars() - trainSize;
              train = new Exemplars(runInstances, 0, trainSize);
              test = new Exemplars(runInstances, trainSize, testSize);
          } else {
              Random rand = new Random(run);
              runInstances.randomize(rand);

              // Nominal class
              if (runInstances.classAttribute().isNominal()) {

                  // create the subset for each classs
                  int numClasses = runInstances.numClasses();
                  Exemplars[] subsets = new Exemplars[numClasses + 1];
                  for (int i = 0; i < numClasses + 1; i++) {
                      subsets[i] = new Exemplars(runInstances, 10);
                  }

                  // divide instances into subsets
                  for (int i = 0; i < runInstances.numExemplars(); i++) {
                      Exemplar inst = runInstances.exemplar(i);
                      if (inst.getInstances().instance(0).classIsMissing()) {
                          subsets[numClasses].add(inst);
                      } else {
                          subsets[(int) inst.classValue()].add(inst);
                      }
                  }

                  // Compactify them
                  for (int i = 0; i < numClasses + 1; i++) {
                      subsets[i].compactify();
                  }

                  // merge into train and test sets
                  train = new Exemplars(runInstances, runInstances.numExemplars());
                  test = new Exemplars(runInstances, runInstances.numExemplars());
                  for (int i = 0; i < numClasses + 1; i++) {
                      int trainSize = Utils.probRound(subsets[i].numExemplars() * m_TrainPercent / 100, rand);
                      for (int j = 0; j < trainSize; j++) {
                          train.add(subsets[i].exemplar(j));
                      }
                      for (int j = trainSize; j < subsets[i].numExemplars(); j++) {
                          test.add(subsets[i].exemplar(j));
                      }
                      // free memory
                      subsets[i] = null;
                  }
                  train.compactify();
                  test.compactify();

                  // randomize the final sets
                  train.randomize(rand);
                  test.randomize(rand);
              } else {

                  // Numeric target 
                  int trainSize = Utils.probRound(runInstances.numExemplars() * m_TrainPercent / 100, rand);
                  int testSize = runInstances.numExemplars() - trainSize;
                  train = new Exemplars(runInstances, 0, trainSize);
                  test = new Exemplars(runInstances, trainSize, testSize);
              }
          }
          try {
              Object[] seResults = m_SplitEvaluator.getResult(train, test);
              Object[] results = new Object[seResults.length + 1];
              results[0] = getTimestamp();
              System.arraycopy(seResults, 0, results, 1, seResults.length);
              if (m_debugOutput) {
                  String resultName = ("" + run + "." + Utils.backQuoteChars(runInstances.relationName()) + "."
                          + m_SplitEvaluator.toString()).replace(' ', '_');
                  resultName = Utils.removeSubstring(resultName, "weka.classifiers.");
                  resultName = Utils.removeSubstring(resultName, "weka.filters.");
                  resultName = Utils.removeSubstring(resultName, "weka.attributeSelection.");
                  m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName);
              }
              m_ResultListener.acceptResult(this, key, results);
          } catch (Exception ex) {
              // Save the train and test datasets for debugging purposes?
              throw ex;
          }
      }
  }