Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:etc.aloe.oilspill2010.BigramFeatureGenerationImpl.java

@Override
public FeatureSpecification generateFeatures(ExampleSet basicExamples) {

    ExampleSet examples = basicExamples.copy();
    FeatureSpecification spec = new FeatureSpecification();

    System.out.print("Configuring features over " + examples.size() + " examples... ");

    try {// w  w  w  .  j  av a  2s .  com
        spec.addFilter(getPronounsFilter(examples));
        spec.addFilter(getPunctuationFilter(examples));
        spec.addFilter(getSpecialWordsFilter(examples));
        spec.addFilter(getSpellingFilter(examples));

        spec.addFilter(getEmoticonsFilter(examples));
        spec.addFilter(getUnigramBigramFilter(examples));
        spec.addFilter(getParticipantsFilter(examples));
        spec.addFilter(getRemoveIDFilter(examples));
        spec.addFilter(getRemoveMessageFilter(examples));
        //spec.addFilter(getSparseToNonsparseFilter(examples));
        //spec.addFilter(getFeatureSelectionFilter(examples));

        Instances output = spec.getOutputFormat();
        int numAttrs = output.numAttributes();
        System.out.println("generated " + (numAttrs - 1) + " features.");
    } catch (Exception e) {
        System.err.println("Error generating features.");
        System.err.println("\t" + e.getMessage());
    }

    return spec;
}

From source file:etc.aloe.oilspill2010.FeatureGenerationImpl.java

License:Open Source License

@Override
public FeatureSpecification generateFeatures(ExampleSet basicExamples) {

    ExampleSet examples = basicExamples.copy();
    FeatureSpecification spec = new FeatureSpecification();

    System.out.print("Configuring features over " + examples.size() + " examples... ");

    try {// w  w  w .  jav  a2  s . c  o m
        spec.addFilter(getPronounsFilter(examples));
        spec.addFilter(getPunctuationFilter(examples));
        spec.addFilter(getSpecialWordsFilter(examples));
        spec.addFilter(getSpellingFilter(examples));

        spec.addFilter(getEmoticonsFilter(examples));
        spec.addFilter(getBagOfWordsFilter(examples));

        if (this.getParticipantFeatureCount() > 0) {
            spec.addFilter(getParticipantsFilter(examples));
        } else {
            spec.addFilter(getRemoveParticipantFilter(examples));
        }

        spec.addFilter(getRemoveIDFilter(examples));
        //spec.addFilter(getSparseToNonsparseFilter(examples));
        //spec.addFilter(getFeatureSelectionFilter(examples));

        Instances output = spec.getOutputFormat();
        int numAttrs = output.numAttributes();
        System.out.println("generated " + (numAttrs - 1) + " features.");
    } catch (Exception e) {
        System.err.println("Error generating features.");
        System.err.println("\t" + e.getMessage());
    }

    return spec;
}

From source file:eu.linda.analytics.formats.ArffInputFormat.java

@Override
public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) {

    helpfulFuncions.nicePrintMessage("import Arff file " + pathToFile);

    Instances data = null;
    //Instances newData = null;
    try {//from  ww  w  .  j a va  2 s  .c o m

        data = ConverterUtils.DataSource.read(pathToFile);

        //NominalToString filter1 = new NominalToString();
        //filter1.setInputFormat(data);
        //data = Filter.useFilter(data, filter1);

        /*/first 2 colums are metadata info used for rdf output
        if (excludeMetadataInfo) {
        String[] options = new String[2];
        options[0] = "-R";                                    // "range"
        options[1] = "1,2";                                     // first attribute
        Remove remove = new Remove();                         // new instance of filter
        remove.setOptions(options);                           // set options
        remove.setInputFormat(data);                          // inform filter about dataset **AFTER** setting options
        newData = Filter.useFilter(data, remove);   // apply filter                
        newData.setClassIndex(newData.numAttributes() - 1);
        return newData;
        }*/
        data.setClassIndex(data.numAttributes() - 1);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:eu.linda.analytics.formats.CSVInputFormat.java

@Override
public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) {

    float timeToGetQuery = 0;
    long startTimeToGetQuery = System.currentTimeMillis();
    helpfulFuncions.nicePrintMessage("import CSV file ");

    System.out.println("Import data from file: " + pathToFile);

    Instances data = null;
    try {// w w w.  ja v  a 2 s . co  m
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(pathToFile));
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(pathToFile);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:eu.linda.analytics.formats.CSVInputFormat.java

public static void main(String[] args) throws Exception {
    Instances data = null;
    String[] options = new String[2];
    options[0] = "-S"; // "range"
    options[1] = "1,2";

    CSVLoader loader = new CSVLoader();
    try {/*from   w  w w.ja v a 2s.co m*/
        loader.setSource(new File("/home/eleni/Desktop/mydatasets/NYRandonResearchTotest2.csv"));

        loader.setStringAttributes("1,2");
        loader.setFieldSeparator(",");

        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

    } catch (IOException ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:eu.linda.analytics.formats.ForecastingRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFunctions.nicePrintMessage("Generate Forecasting RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/myRepository/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "#analyzedField");
    Property predictedValue = model.createProperty(NS + "#predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);

    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource linda_user = model.createResource(analytics_NS + "User");

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + (analytics.getVersion() + 1));
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "linda analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    if (helpfulFunctions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }/*from w  ww  . j  av  a  2s  .  co m*/

    if (helpfulFunctions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);

        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(1));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));
    }
    return model;

}

From source file:eu.linda.analytics.formats.GeneralRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFuncions.nicePrintMessage("Generate General RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "analyzedField");
    Property predictedValue = model.createProperty(NS + "predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");
    Property dataSizeOfAnalyzedDataProperty = model.createProperty(NS + "dataSizeOfAnalyzedDatainBytes");
    Property timeToGetDataProperty = model.createProperty(NS + "timeToGetDataInSecs");
    Property timeToRunAnalyticsProcessProperty = model.createProperty(NS + "timeToRunAnalyticsProcessInSecs");
    Property timeToCreateRDFOutPutProperty = model.createProperty(NS + "timeToCreateRDFOutPutInSecs");
    Property performanceProperty = model.createProperty(NS + "hasPerformance");
    Property atTime = model.createProperty("http://www.w3.org/ns/prov#atTime");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);
    Resource linda_user = model.createResource(analytics_NS + "User");
    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource performance = model.createResource(analytics_NS + "performance");
    Resource performance_statement = model
            .createResource(analytics_NS + "performance/" + analytics.getId() + "/" + analytics.getVersion());

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + analytics.getVersion());
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "Linda Analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    Calendar cal = GregorianCalendar.getInstance();
    Literal value = model.createTypedLiteral(cal);
    analytic_process_statement.addProperty(atTime, value);

    performance_statement.addProperty(RDF.type, performance);
    performance_statement.addProperty(dataSizeOfAnalyzedDataProperty, Float.toString(analytics.getData_size()));
    performance_statement.addProperty(timeToGetDataProperty, Float.toString(analytics.getTimeToGet_data()));
    performance_statement.addProperty(timeToRunAnalyticsProcessProperty,
            Float.toString(analytics.getTimeToRun_analytics()));
    performance_statement.addProperty(timeToCreateRDFOutPutProperty,
            Float.toString(analytics.getTimeToCreate_RDF()));
    analytic_process_statement.addProperty(performanceProperty, performance_statement);

    if (helpfulFuncions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }//from   w  ww. j a v  a2  s.  c  om

    if (helpfulFuncions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {
        //for (Instance triplet : triplets) {
        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(0));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);
        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));

    }

    return model;

}

From source file:eu.linda.analytics.formats.RDFInputFormat.java

@Override
public AbstractList importData4weka(String query_id, boolean isForRDFOutput, Analytics analytics) {

    String queryURI = connectionController.getQueryURI(query_id);

    helpfulFunctions.nicePrintMessage("import data from uri " + queryURI);

    Instances data = null;
    try {/*  w  w w.  ja v a2 s.  c o  m*/
        float timeToGetQuery = 0;
        long startTimeToGetQuery = System.currentTimeMillis();
        URL url = new URL(queryURI);
        if (!helpfulFunctions.isURLResponsive(url)) {
            return null;
        }
        File tmpfile4lindaquery = File.createTempFile("tmpfile4lindaquery" + query_id, ".tmp");
        FileUtils.copyURLToFile(url, tmpfile4lindaquery);

        System.out.println("Downloaded File Query: " + tmpfile4lindaquery);

        CSVLoader loader = new CSVLoader();
        loader.setSource(tmpfile4lindaquery);
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(tmpfile4lindaquery);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:examples.Pair.java

License:Open Source License

/**
 * @param args the command line arguments
 *//*from   w  w w.  j  a v a2  s.  com*/
public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.out.println("Requires path to the dataset as the first and only argument");
        return;
    }

    final String datasetPath = args[0];

    // Create classifiers
    MultiStageCascading msc = new MultiStageCascading();
    J48 classifier1 = new J48();
    IBk knn = new IBk(3);

    // Set sequence of classifiers
    msc.setClassifiers(new Classifier[] { classifier1, new NBTree() });
    msc.setDebug(true);
    // Set a classifier that will classify an instance that is not classified by all other classifiers
    msc.setLastClassifier(knn);

    // First classifier will have confidence threshold 0.95 and the second one 0.97
    msc.setConfidenceThresholds("0.95,0.97");
    // 80% of instances in training set will be randomly selected to train j-th classifier
    msc.setPercentTrainingInstances(0.8);

    Instances dataset = DataSource.read(datasetPath);
    dataset.setClassIndex(dataset.numAttributes() - 1);

    // Create test and training sets
    Pair<Instances, Instances> sets = seprateTestAndTrainingSets(dataset, 0.7);
    Instances trainingSet = sets.getFirst();
    Instances testSet = sets.getSecond();

    // Build cascade classifier
    msc.buildClassifier(trainingSet);

    // Evaluate created classifier
    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(msc, testSet);
    System.out.println(eval.toSummaryString("\nResults\n\n", false));
}

From source file:examples.TrainerFrame.java

private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed
    //This is a temporary fix to make it appear like its finished

    pBar.setMaximum(7);/*from   w  w  w.java  2  s  .c  o  m*/
    pBar.setValue(0);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Target Features");
    //Generate Target Features
    String featuresTarget = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(1);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Other Features");

    //Generate Non-targe features Features
    String featuresOther = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(2);
    pBar.repaint();
    jLabelTrainerStatus.setText("Parsing Features");

    //Load Target Arrf File
    BufferedReader readerTarget;
    Instances dataTarget = null;
    try {
        readerTarget = new BufferedReader(new FileReader(featuresTarget));
        dataTarget = new Instances(readerTarget);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(3);
    pBar.repaint();
    //Load Other Arrf File
    BufferedReader readerOther;
    Instances dataOther = null;
    try {
        readerOther = new BufferedReader(new FileReader(featuresOther));
        dataOther = new Instances(readerOther);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(4);
    pBar.repaint();
    jLabelTrainerStatus.setText("Training Classifier");

    Instances newData = new Instances(dataTarget);
    FastVector typeList = new FastVector() {
    };
    typeList.add("target");
    typeList.add("other");
    newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            newData.numAttributes());
    for (Instance instance : newData) {
        instance.setValue(newData.numAttributes() - 1, "target");
    }

    dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            dataOther.numAttributes());
    for (Instance instance : dataOther) {
        instance.setValue(newData.numAttributes() - 1, "other");
        newData.add(instance);
    }

    newData.setClassIndex(newData.numAttributes() - 1);
    pBar.setValue(5);
    pBar.repaint();
    ArffSaver saver = new ArffSaver();
    saver.setInstances(newData);
    try {
        saver.setFile(new File("AnimalCallTrainingFile.arff"));
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(6);
    pBar.repaint();
    //Train a classifier
    String[] options = new String[1];
    options[0] = "-U";
    J48 tree = new J48();
    try {
        tree.setOptions(options);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree);
    System.out.println("classifier saved");
    MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText());
    GlobalData.classifierList.addElement(tempClass.name);
    pBar.setValue(7);
    pBar.repaint();
    jLabelTrainerStatus.setText("Finished");

}