Example usage for weka.classifiers AbstractClassifier forName

Introduction

In this page you can find the example usage for weka.classifiers AbstractClassifier forName.

Prototype

public static Classifier forName(String classifierName, String[] options) throws Exception

Source Link

Document

Creates a new instance of a classifier given it's class name and (optional) arguments to pass to it's setOptions method.

Usage

From source file:WrapperSubset.java

License:Open Source License

/**
 * Parses a given list of options./* w w w. j av  a 2  s . co  m*/
 * <p/>
 *
 * <!-- options-start --> Valid options are:
 * <p/>
 *
 * <pre>
 * -B &lt;base learner&gt;
 *  class name of base learner to use for  accuracy estimation.
 *  Place any classifier options LAST on the command line
 *  following a "--". eg.:
 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
 *  (default: weka.classifiers.rules.ZeroR)
 * </pre>
 *
 * <pre>
 * -F &lt;num&gt;
 *  number of cross validation folds to use for estimating accuracy.
 *  (default=5)
 * </pre>
 *
 * <pre>
 * -R &lt;seed&gt;
 *  Seed for cross validation accuracy testimation.
 *  (default = 1)
 * </pre>
 *
 * <pre>
 * -T &lt;num&gt;
 *  threshold by which to execute another cross validation
 *  (standard deviation---expressed as a percentage of the mean).
 *  (default: 0.01 (1%))
 * </pre>
 *
 * <pre>
 * -E &lt;acc | rmse | mae | f-meas | auc | auprc&gt;
 *  Performance evaluation measure to use for selecting attributes.
 *  (Default = accuracy for discrete class and rmse for numeric class)
 * </pre>
 *
 * <pre>
 * -IRclass &lt;label | index&gt;
 *  Optional class value (label or 1-based index) to use in conjunction with
 *  IR statistics (f-meas, auc or auprc). Omitting this option will use
 *  the class-weighted average.
 * </pre>
 *
 * <pre>
 * Options specific to scheme weka.classifiers.rules.ZeroR:
 * </pre>
 *
 * <pre>
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * </pre>
 *
 * <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
@Override
public void setOptions(String[] options) throws Exception {
    String optionString;
    resetOptions();
    optionString = Utils.getOption('B', options);

    if (optionString.length() == 0) {
        optionString = ZeroR.class.getName();
    }
    setClassifier(AbstractClassifier.forName(optionString, Utils.partitionOptions(options)));
    optionString = Utils.getOption('F', options);

    if (optionString.length() != 0) {
        setFolds(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
        setSeed(Integer.parseInt(optionString));
    }

    // optionString = Utils.getOption('S',options);
    // if (optionString.length() != 0)
    // {
    // seed = Integer.parseInt(optionString);
    // }
    optionString = Utils.getOption('T', options);

    if (optionString.length() != 0) {
        Double temp;
        temp = Double.valueOf(optionString);
        setThreshold(temp.doubleValue());
    }

    optionString = Utils.getOption('E', options);
    if (optionString.length() != 0) {
        if (optionString.equals("acc")) {
            setEvaluationMeasure(new SelectedTag(EVAL_ACCURACY, TAGS_EVALUATION));
        } else if (optionString.equals("rmse")) {
            setEvaluationMeasure(new SelectedTag(EVAL_RMSE, TAGS_EVALUATION));
        } else if (optionString.equals("mae")) {
            setEvaluationMeasure(new SelectedTag(EVAL_MAE, TAGS_EVALUATION));
        } else if (optionString.equals("f-meas")) {
            setEvaluationMeasure(new SelectedTag(EVAL_FMEASURE, TAGS_EVALUATION));
        } else if (optionString.equals("auc")) {
            setEvaluationMeasure(new SelectedTag(EVAL_AUC, TAGS_EVALUATION));
        } else if (optionString.equals("auprc")) {
            setEvaluationMeasure(new SelectedTag(EVAL_AUPRC, TAGS_EVALUATION));
        } else {
            throw new IllegalArgumentException("Invalid evaluation measure");
        }
    }

    optionString = Utils.getOption("IRClass", options);
    if (optionString.length() > 0) {
        setIRClassValue(optionString);
    }
}

From source file:com.deafgoat.ml.prognosticator.AppClassifier.java

License:Apache License

/**
 * Initialize instances classifier./* w w w .  j  a  v a2  s.c o m*/
 * 
 * @throws Exception
 *             If the classifier can not be initialized.
 */
public void initializeClassifier() throws Exception {
    String base = "weka.classifiers.";
    String[] groups = new String[] { "bayes.", "functions.", "lazy.", "meta.", "misc.", "rules.", "trees." };
    for (int i = 0; i < groups.length; i++) {
        try {
            _cls = AbstractClassifier.forName(base + groups[i] + _config._classifier, null);
            break;
        } catch (Exception e) {
            if (i == groups.length - 1) {
                _logger.error("Could not create classifier - msg: " + e.getMessage(), e);
            }
        }
    }
}

From source file:de.unidue.langtech.grading.tc.LearningCurveTask.java

License:Open Source License

@Override
public void execute(TaskContext aContext) throws Exception {
    boolean multiLabel = false;

    for (Integer numberInstances : NUMBER_OF_TRAINING_INSTANCES) {
        for (int iteration = 0; iteration < ITERATIONS; iteration++) {
            File arffFileTrain = new File(
                    aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY)
                            .getPath() + "/" + TRAINING_DATA_FILENAME);
            File arffFileTest = new File(
                    aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TEST_DATA, AccessMode.READONLY).getPath()
                            + "/" + TRAINING_DATA_FILENAME);

            Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel);
            Instances testData = TaskUtils.getInstances(arffFileTest, multiLabel);

            if (numberInstances > trainData.size()) {
                continue;
            }/*from  ww w.ja v a  2s. com*/

            Classifier cl = AbstractClassifier.forName(classificationArguments.get(0),
                    classificationArguments.subList(1, classificationArguments.size()).toArray(new String[0]));

            Instances copyTestData = new Instances(testData);
            trainData = WekaUtils.removeOutcomeId(trainData, multiLabel);
            testData = WekaUtils.removeOutcomeId(testData, multiLabel);

            Random generator = new Random();
            generator.setSeed(System.nanoTime());

            trainData.randomize(generator);

            // remove fraction of training data that should not be used for training
            for (int i = trainData.size() - 1; i >= numberInstances; i--) {
                trainData.delete(i);
            }

            // file to hold prediction results
            File evalOutput = new File(
                    aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE).getPath() + "/"
                            + EVALUATION_DATA_FILENAME + "_" + numberInstances + "_" + iteration);

            // train the classifier on the train set split - not necessary in multilabel setup, but
            // in single label setup
            cl.buildClassifier(trainData);

            weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(),
                    WekaUtils.getEvaluationSinglelabel(cl, trainData, testData));
            testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl);
            testData = WekaUtils.addOutcomeId(testData, copyTestData, false);

            //                // Write out the predictions
            //                DataSink.write(aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE)
            //                        .getAbsolutePath() + "/" + PREDICTIONS_FILENAME + "_" + trainPercent, testData); 
        }
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation)
        throws AnalysisEngineProcessException {
    // create attribute (including label) info
    ArrayList<Attribute> attributes = new ArrayList<>();
    List<String> labelNames = ClassifierProvider.labelNames(Y);
    labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n")))
            .forEachOrdered(attributes::add);
    List<String> featureNames = ClassifierProvider.featureNames(X);
    featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add);
    String name = Files.getNameWithoutExtension(modelFile.getName());
    datasetSchema = new Instances(name, attributes, 0);
    datasetSchema.setClassIndex(labelNames.size());
    // add instances
    // due to the limitation of the interface definition, X, Y should be reorganized
    SetMultimap<Map<String, Double>, String> XY = HashMultimap.create();
    IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i)));
    Instances trainingInstances = new Instances(datasetSchema, XY.size());
    for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) {
        Set<String> y = ImmutableSet.copyOf(entry.getValue());
        Map<String, Double> x = entry.getKey();
        SparseInstance instance = new SparseInstance(labelNames.size() + x.size());
        for (String labelName : labelNames) {
            instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n");
        }/*  w  ww . j  a  va2 s . c om*/
        for (Map.Entry<String, Double> e : x.entrySet()) {
            instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue());
        }
        trainingInstances.add(instance);
    }
    // training
    try {
        classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options);
        classifier.buildClassifier(trainingInstances);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    try {
        SerializationHelper.write(modelFile.getAbsolutePath(), classifier);
        SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    if (crossValidation) {
        try {
            Evaluation eval = new Evaluation(trainingInstances);
            Random rand = new Random();
            eval.crossValidateModel(classifier, trainingInstances, 10, rand);
            LOG.debug(eval.toSummaryString());
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java

License:Apache License

@Override
public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation)
        throws AnalysisEngineProcessException {
    // create attribute (including label) info
    ArrayList<Attribute> attributes = new ArrayList<>();
    ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add);
    Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y));
    attributes.add(label);//from  ww w  .ja  v a 2s .  c o m
    String name = Files.getNameWithoutExtension(modelFile.getName());
    datasetSchema = new Instances(name, attributes, X.size());
    datasetSchema.setClass(label);
    // add instances
    Instances trainingInstances = new Instances(datasetSchema, X.size());
    if (balanceWeight) {
        Multiset<String> labelCounts = HashMultiset.create(Y);
        double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max()
                .orElseThrow(AnalysisEngineProcessException::new);
        for (int i = 0; i < X.size(); i++) {
            String y = Y.get(i);
            double weight = maxCount / labelCounts.count(y);
            trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances));
        }
    } else {
        for (int i = 0; i < X.size(); i++) {
            trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances));
        }
    }
    // training
    try {
        classifier = AbstractClassifier.forName(classifierName, options);
        classifier.buildClassifier(trainingInstances);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    // write model and dataset schema
    try {
        SerializationHelper.write(modelFile.getAbsolutePath(), classifier);
        SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    // backup training dataset as arff file
    if (datasetExportFile != null) {
        try {
            ArffSaver saver = new ArffSaver();
            saver.setInstances(trainingInstances);
            saver.setFile(datasetExportFile);
            saver.writeBatch();
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
    if (crossValidation) {
        try {
            Evaluation eval = new Evaluation(trainingInstances);
            Random rand = new Random();
            eval.crossValidateModel(classifier, trainingInstances, 10, rand);
            LOG.debug(eval.toSummaryString());
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}

From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

/**
 * Builds and trains a classifier.//from ww w  .  j  a  v  a  2  s .c o m
 *
 * @param name
 *            FQCN of the classifier
 * @param options
 *            options to pass to the classifier
 * @param trainingSet
 *            training set to build the classifier with
 * @return trained classifier
 */
public Classifier buildClassifier(final String name, final String[] options, final Instances trainingSet) {
    try {
        final Classifier classifier = AbstractClassifier.forName(this.classifierName, options);
        classifier.buildClassifier(trainingSet);
        return classifier;
    } catch (final Exception e1) { // NOCS: Weka throws only raw exceptions
        System.err.println("Unable to create classifier " + this.classifierName);
        throw new RuntimeException(e1);
    }
}

From source file:meka.classifiers.multilabel.CDN.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);//from   w  w  w .  j  av a2  s. com

    int N = D.numInstances();
    int L = D.classIndex();
    h = new Classifier[L];
    m_R = new Random(m_S);
    D_templates = new Instances[L];

    // Build L probabilistic models, each to predict Y_i | X, Y_{-y}; save the templates.
    for (int j = 0; j < L; j++) {
        // X = [Y[0],...,Y[j-1],Y[j+1],...,Y[L],X]
        D_templates[j] = new Instances(D);
        D_templates[j].setClassIndex(j);
        // train H[j] : X -> Y
        h[j] = AbstractClassifier.forName(getClassifier().getClass().getName(),
                ((AbstractClassifier) getClassifier()).getOptions());
        h[j].buildClassifier(D_templates[j]);
    }
}

From source file:meka.classifiers.multilabel.meta.MBR.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    testCapabilities(data);//from  www . j  a v  a  2 s .  c o  m

    int c = data.classIndex();

    // Base BR

    if (getDebug())
        System.out.println("Build BR Base (" + c + " models)");
    m_BASE = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_BASE.buildClassifier(data);

    // Meta BR

    if (getDebug())
        System.out.println("Prepare Meta data           ");
    Instances meta_data = new Instances(data);

    FastVector BinaryClass = new FastVector(c);
    BinaryClass.addElement("0");
    BinaryClass.addElement("1");

    for (int i = 0; i < c; i++) {
        meta_data.insertAttributeAt(new Attribute("metaclass" + i, BinaryClass), c);
    }

    for (int i = 0; i < data.numInstances(); i++) {
        double cfn[] = m_BASE.distributionForInstance(data.instance(i));
        for (int a = 0; a < cfn.length; a++) {
            meta_data.instance(i).setValue(a + c, cfn[a]);
        }
    }

    meta_data.setClassIndex(c);
    m_InstancesTemplate = new Instances(meta_data, 0);

    if (getDebug())
        System.out.println("Build BR Meta (" + c + " models)");

    m_META = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(),
            ((AbstractClassifier) getClassifier()).getOptions());
    m_META.buildClassifier(meta_data);
}

From source file:news.classifier.WekaLearner.java

public void setClassifier(String name, String[] options) throws Exception {
    wClassifier = AbstractClassifier.forName(name, options);
}

From source file:org.knime.knip.suise.node.pixclassmodel.SettingsModelWekaClassifier.java

License:Open Source License

/**
 * {@inheritDoc}/* w  w  w . ja  v  a  2s.c  o m*/
 */
@Override
protected void loadSettingsForModel(NodeSettingsRO settings) throws InvalidSettingsException {
    String name = settings.getString(m_configName + KEY_CLASSIFIER_NAME);
    String[] options = settings.getStringArray(m_configName + KEY_CLASSIFIER_OPTIONS);
    try {
        m_wekaClassifier = (AbstractClassifier) AbstractClassifier.forName(name, options);
    } catch (Exception e) {
        throw new InvalidSettingsException(e);
    }

}