List of usage examples for weka.experiment OutputZipper OutputZipper
public OutputZipper(File destination) throws Exception
From source file:milk.experiment.MICrossValidationResultProducer.java
License:Open Source License
/** * Gets the results for a specified run number. Different run * numbers correspond to different randomizations of the data. Results * produced should be sent to the current ResultListener *//from w ww . j av a 2 s .c o m * @param run the run number to get results for. * @exception Exception if a problem occurs while getting the results */ public void doRun(int run) throws Exception { if (getRawOutput()) { if (m_ZipDest == null) { m_ZipDest = new OutputZipper(m_OutputFile); } } if (m_Instances == null) { throw new Exception("No Instances set"); } // Randomize on a copy of the original dataset Exemplars runInstances = new Exemplars(m_Instances); Random random = new Random(run); runInstances.randomize(random); if (runInstances.classAttribute().isNominal()) { runInstances.stratify(m_NumFolds); } for (int fold = 0; fold < m_NumFolds; fold++) { // Add in some fields to the key like run and fold number, dataset name Object[] seKey = m_SplitEvaluator.getKey(); Object[] key = new Object[seKey.length + 3]; key[0] = Utils.backQuoteChars(m_Instances.relationName()); key[1] = "" + run; key[2] = "" + (fold + 1); System.arraycopy(seKey, 0, key, 3, seKey.length); if (m_ResultListener.isResultRequired(this, key)) { Exemplars train = runInstances.trainCV(m_NumFolds, fold, random); Exemplars test = runInstances.testCV(m_NumFolds, fold); try { Object[] seResults = m_SplitEvaluator.getResult(train, test); Object[] results = new Object[seResults.length + 1]; results[0] = getTimestamp(); System.arraycopy(seResults, 0, results, 1, seResults.length); if (m_debugOutput) { String resultName = ("" + run + "." + (fold + 1) + "." + Utils.backQuoteChars(runInstances.relationName()) + "." + m_SplitEvaluator.toString()).replace(' ', '_'); resultName = Utils.removeSubstring(resultName, "weka.classifiers."); resultName = Utils.removeSubstring(resultName, "weka.filters."); resultName = Utils.removeSubstring(resultName, "weka.attributeSelection."); m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName); } m_ResultListener.acceptResult(this, key, results); } catch (Exception ex) { // Save the train and test datasets for debugging purposes? throw ex; } } } }
From source file:milk.experiment.MIRandomSplitResultProducer.java
License:Open Source License
/** * Gets the results for a specified run number. Different run * numbers correspond to different randomizations of the data. Results * produced should be sent to the current ResultListener */* w w w . j a va2 s. c o m*/ * @param run the run number to get results for. * @exception Exception if a problem occurs while getting the results */ public void doRun(int run) throws Exception { if (getRawOutput()) { if (m_ZipDest == null) { m_ZipDest = new OutputZipper(m_OutputFile); } } if (m_Instances == null) { throw new Exception("No Instances set"); } // Add in some fields to the key like run number, dataset name Object[] seKey = m_SplitEvaluator.getKey(); Object[] key = new Object[seKey.length + 2]; key[0] = Utils.backQuoteChars(m_Instances.relationName()); key[1] = "" + run; System.arraycopy(seKey, 0, key, 2, seKey.length); if (m_ResultListener.isResultRequired(this, key)) { // Randomize on a copy of the original dataset Exemplars runInstances = new Exemplars(m_Instances); Exemplars train; Exemplars test; if (!m_randomize) { // Don't do any randomization int trainSize = Utils.round(runInstances.numExemplars() * m_TrainPercent / 100); int testSize = runInstances.numExemplars() - trainSize; train = new Exemplars(runInstances, 0, trainSize); test = new Exemplars(runInstances, trainSize, testSize); } else { Random rand = new Random(run); runInstances.randomize(rand); // Nominal class if (runInstances.classAttribute().isNominal()) { // create the subset for each classs int numClasses = runInstances.numClasses(); Exemplars[] subsets = new Exemplars[numClasses + 1]; for (int i = 0; i < numClasses + 1; i++) { subsets[i] = new Exemplars(runInstances, 10); } // divide instances into subsets for (int i = 0; i < runInstances.numExemplars(); i++) { Exemplar inst = runInstances.exemplar(i); if (inst.getInstances().instance(0).classIsMissing()) { subsets[numClasses].add(inst); } else { subsets[(int) inst.classValue()].add(inst); } } // Compactify them for (int i = 0; i < numClasses + 1; i++) { subsets[i].compactify(); } // merge into train and test sets train = new Exemplars(runInstances, runInstances.numExemplars()); test = new Exemplars(runInstances, runInstances.numExemplars()); for (int i = 0; i < numClasses + 1; i++) { int trainSize = Utils.probRound(subsets[i].numExemplars() * m_TrainPercent / 100, rand); for (int j = 0; j < trainSize; j++) { train.add(subsets[i].exemplar(j)); } for (int j = trainSize; j < subsets[i].numExemplars(); j++) { test.add(subsets[i].exemplar(j)); } // free memory subsets[i] = null; } train.compactify(); test.compactify(); // randomize the final sets train.randomize(rand); test.randomize(rand); } else { // Numeric target int trainSize = Utils.probRound(runInstances.numExemplars() * m_TrainPercent / 100, rand); int testSize = runInstances.numExemplars() - trainSize; train = new Exemplars(runInstances, 0, trainSize); test = new Exemplars(runInstances, trainSize, testSize); } } try { Object[] seResults = m_SplitEvaluator.getResult(train, test); Object[] results = new Object[seResults.length + 1]; results[0] = getTimestamp(); System.arraycopy(seResults, 0, results, 1, seResults.length); if (m_debugOutput) { String resultName = ("" + run + "." + Utils.backQuoteChars(runInstances.relationName()) + "." + m_SplitEvaluator.toString()).replace(' ', '_'); resultName = Utils.removeSubstring(resultName, "weka.classifiers."); resultName = Utils.removeSubstring(resultName, "weka.filters."); resultName = Utils.removeSubstring(resultName, "weka.attributeSelection."); m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName); } m_ResultListener.acceptResult(this, key, results); } catch (Exception ex) { // Save the train and test datasets for debugging purposes? throw ex; } } }