Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:old.CFS.java

/**
 * takes a dataset as first argument//from w  w w  . java 2  s  .  c o m
 *
 * @param args        the commandline arguments
 * @throws Exception  if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // load data
    System.out.println("\n0. Loading data");
    DataSource source = new DataSource("D:\\ALL\\imdb_grid_size=1000_MIN=50_genres=5.arff");
    Instances data = source.getDataSet();
    data.setClass(data.attribute("Horror"));

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    //    // 1. meta-classifier
    //    useClassifier(data);
    //
    //    // 2. filter
    //    useFilter(data);

    // 3. low-level
    useLowLevel(data);
}

From source file:org.esa.nest.gpf.SGD.java

/**
 * Method for building the classifier./* www .j a  v a 2  s  . c  om*/
 *
 * @param data the set of training instances.
 * @throws Exception if the classifier can't be built successfully.
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    reset();

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    if (data.numInstances() > 0 && !m_dontReplaceMissing) {
        m_replaceMissing = new ReplaceMissingValues();
        m_replaceMissing.setInputFormat(data);
        data = Filter.useFilter(data, m_replaceMissing);
    }

    // check for only numeric attributes
    boolean onlyNumeric = true;
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != data.classIndex()) {
            if (!data.attribute(i).isNumeric()) {
                onlyNumeric = false;
                break;
            }
        }
    }

    if (!onlyNumeric) {
        if (data.numInstances() > 0) {
            m_nominalToBinary = new weka.filters.supervised.attribute.NominalToBinary();
        } else {
            m_nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary();
        }
        m_nominalToBinary.setInputFormat(data);
        data = Filter.useFilter(data, m_nominalToBinary);
    }

    if (!m_dontNormalize && data.numInstances() > 0) {

        m_normalize = new Normalize();
        m_normalize.setInputFormat(data);
        data = Filter.useFilter(data, m_normalize);
    }

    m_numInstances = data.numInstances();

    m_weights = new double[data.numAttributes() + 1];
    m_data = new Instances(data, 0);

    if (data.numInstances() > 0) {
        data.randomize(new Random(getSeed())); // randomize the data
        train(data);
    }
}

From source file:org.ml.classifier.TextDirectoryToArff.java

License:Open Source License

public static void main(String[] args) {

    //      if (args.length == 2) {
    TextDirectoryToArff tdta = new TextDirectoryToArff();
    try {/*from www . j  a v a 2s .  c  om*/
        //            Instances trainData = tdta.createDataset(TRAINING_FILES);
        //            LOGGER.debug(trainData.toString());

        Instances testData = tdta.createDataset(TESTING_FILES);
        //            LOGGER.debug(testData.toString());
        //            System.out.println(testData);

        //            System.exit(0);

        // apply the StringToWordVector in a batch mode
        // (see the source code of setOptions(String[]) method of the filter
        // if you want to know which command-line option corresponds to which
        // bean property)
        //            StringToWordVector strToWordFilter = new StringToWordVector();
        //            strToWordFilter.setInputFormat(trainData);
        //            strToWordFilter.setOutputWordCounts(true);
        //            strToWordFilter.setTFTransform(true);
        //            strToWordFilter.setIDFTransform(true);

        //            trainData = Filter.useFilter(trainData, strToWordFilter);
        //            testData = Filter.useFilter(testData, strToWordFilter);

        //transform to non-sparse format
        //            SparseToNonSparse spFilter = new SparseToNonSparse(); 
        //            spFilter.setInputFormat(trainData);
        //            trainData = Filter.useFilter(trainData, spFilter);
        //            testData = Filter.useFilter(testData, spFilter);

        //            Standardize standardizeFilter = new Standardize();
        //            standardizeFilter.setInputFormat(trainData);
        //            
        //            Instances newTrainData = Filter.useFilter(trainData, standardizeFilter);
        //            Instances newTestData = Filter.useFilter(testData, standardizeFilter);

        //            NaiveBayesMultinomial cl = null;

        //            // train classifier
        //            cl = new NaiveBayesMultinomial();
        //            // further options...
        //            cl.buildClassifier(trainData);

        //            FilteredClassifier fcl = new FilteredClassifier();
        //            fcl.setFilter(strToWordFilter);
        //            fcl.setClassifier(cl);
        //            
        //            fcl.buildClassifier(trainData);

        //            SerializationHelper.write(MODEL, fcl);

        // read the model from the file
        FilteredClassifier fcl = (FilteredClassifier) SerializationHelper.read(MODEL);

        //            System.out.println("Training finished!");
        //            System.exit(0);

        //            Evaluation eTest = new Evaluation(trainData);

        //            eTest.evaluateModel(cl, trainData);
        //            String strSummary = eTest.toSummaryString();
        //            LOGGER.debug(strSummary);

        //            eTest.evaluateModel(cl, testData);
        //            strSummary = eTest.toSummaryString();
        //            LOGGER.debug(strSummary);

        // Get the confusion matrix
        //            double[][] cmMatrix = eTest.confusionMatrix();
        //            LOGGER.debug(cmMatrix);

        int[] myLst = { 5, 7, 9, 100, 345, 1000, 1500, 7500 };

        for (int i = 0; i < myLst.length; i++) {
            int idx = myLst[i];
            System.out.println("Actual: " + testData.instance(idx).stringValue(testData.classIndex()));
            long start = System.currentTimeMillis();
            System.out.println(fcl.classifyInstance(testData.instance(idx)));
            long end = System.currentTimeMillis();
            System.out.println("\n Time: " + (end - start) + " ms");
        }

    } catch (Exception e) {
        LOGGER.error(e.getMessage());
        e.printStackTrace();
    }
    //      } else {
    //         System.out.println("Usage: java TextDirectoryToArff <directory name>");
    //      }
}

From source file:org.openml.webapplication.fantail.dc.DCUntils.java

License:Open Source License

public static double[] computeAttributeEntropy(Instances data) {
    List<Double> attributeEntropy = new ArrayList<Double>();
    for (int attIndex = 0; attIndex < data.numAttributes(); attIndex++) {

        if (data.attribute(attIndex).isNominal() && (data.classIndex() != attIndex)) {
            double[] attValueCounts = new double[data.numDistinctValues(attIndex)];

            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                attValueCounts[(int) inst.value(attIndex)]++;
            }//from  ww w.  j  av  a2 s .c o  m
            double attEntropy = 0;
            for (int c = 0; c < data.attribute(attIndex).numValues(); c++) {
                if (attValueCounts[c] > 0) {
                    double prob_c = attValueCounts[c] / data.numInstances();
                    attEntropy += prob_c * (Utils.log2(prob_c));
                }
            }
            attEntropy = attEntropy * -1.0;
            attributeEntropy.add(attEntropy);
        }
    }
    return ArrayUtils.toPrimitive(attributeEntropy.toArray(new Double[attributeEntropy.size()]));
}

From source file:org.openml.webapplication.fantail.dc.DCUntils.java

License:Open Source License

public static double[] computeMutualInformation(Instances data) {
    List<Double> mutualInformation = new ArrayList<Double>();

    for (int attIndex = 0; attIndex < data.numAttributes(); attIndex++) {
        if (data.attribute(attIndex).isNominal() && (data.classIndex() != attIndex)) {
            //      System.out.println(data.attribute(attIndex));
            double infoGain = computeInfoGain(data, data.attribute(attIndex));
            infoGain = Math.round(infoGain * Math.pow(10, 14)) / Math.pow(10, 14);
            mutualInformation.add(infoGain);
        }// w  ww  .j a v  a 2s  . c  o m
    }
    return ArrayUtils.toPrimitive(mutualInformation.toArray(new Double[mutualInformation.size()]));
}

From source file:org.openml.webapplication.fantail.dc.statistical.AttributeEntropy.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances data) {
    int nominal_count = 0;
    for (int i = 0; i < data.numAttributes(); ++i) {
        if (data.attribute(i).isNominal() && data.classIndex() != i) {
            nominal_count += 1;/*from   w w w .j a  v  a 2s  .co  m*/
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    if (data.classAttribute().isNominal()) {
        double classEntropy = DCUntils.computeClassEntropy(data);
        double[] attEntropy = DCUntils.computeAttributeEntropy(data);
        double[] mutualInformation = DCUntils.computeMutualInformation(data);

        double meanMI = StatUtils.mean(mutualInformation);
        double meanAttEntropy = nominal_count > 0 ? StatUtils.mean(attEntropy) : -1;

        double noiseSignalRatio;
        double ena = 0;

        if (meanMI <= 0) {
            ena = -1;
            noiseSignalRatio = -1;
        } else {
            ena = classEntropy / meanMI;
            noiseSignalRatio = (meanAttEntropy - meanMI) / meanMI;
        }

        qualities.put(ids[0], classEntropy);
        qualities.put(ids[1], meanAttEntropy);
        qualities.put(ids[2], meanMI);
        qualities.put(ids[3], ena);
        qualities.put(ids[4], noiseSignalRatio);

        qualities.put(ids[5], StatUtils.min(attEntropy));
        qualities.put(ids[6], StatUtils.min(mutualInformation));

        qualities.put(ids[7], StatUtils.max(attEntropy));
        qualities.put(ids[8], StatUtils.max(mutualInformation));

        qualities.put(ids[9], StatUtils.percentile(attEntropy, 25));
        qualities.put(ids[10], StatUtils.percentile(mutualInformation, 25));

        qualities.put(ids[11], StatUtils.percentile(attEntropy, 50));
        qualities.put(ids[12], StatUtils.percentile(mutualInformation, 50));

        qualities.put(ids[13], StatUtils.percentile(attEntropy, 75));
        qualities.put(ids[14], StatUtils.percentile(mutualInformation, 75));
    } else { // numeric target
        for (int i = 0; i < ids.length; ++i) {
            qualities.put(ids[i], -1.0);
        }
    }
    return qualities;
}

From source file:org.openml.webapplication.features.ExtractFeatures.java

License:Open Source License

public static List<Feature> getFeatures(Instances dataset, String defaultClass) {
    if (defaultClass != null) {
        dataset.setClass(dataset.attribute(defaultClass));
    } else {/*  ww  w  .ja v a  2s  .co  m*/
        dataset.setClassIndex(dataset.numAttributes() - 1);
    }

    final ArrayList<Feature> resultFeatures = new ArrayList<Feature>();

    for (int i = 0; i < dataset.numAttributes(); i++) {
        Attribute att = dataset.attribute(i);
        int numValues = dataset.classAttribute().isNominal() ? dataset.classAttribute().numValues() : 0;
        AttributeStatistics attributeStats = new AttributeStatistics(dataset.attribute(i), numValues);

        for (int j = 0; j < dataset.numInstances(); ++j) {
            attributeStats.addValue(dataset.get(j).value(i), dataset.get(j).classValue());
        }

        String data_type = null;

        Integer numberOfDistinctValues = null;
        Integer numberOfUniqueValues = null;
        Integer numberOfMissingValues = null;
        Integer numberOfIntegerValues = null;
        Integer numberOfRealValues = null;
        Integer numberOfNominalValues = null;
        Integer numberOfValues = null;

        Double maximumValue = null;
        Double minimumValue = null;
        Double meanValue = null;
        Double standardDeviation = null;

        AttributeStats as = dataset.attributeStats(i);

        numberOfDistinctValues = as.distinctCount;
        numberOfUniqueValues = as.uniqueCount;
        numberOfMissingValues = as.missingCount;
        numberOfIntegerValues = as.intCount;
        numberOfRealValues = as.realCount;
        numberOfMissingValues = as.missingCount;

        if (att.isNominal()) {
            numberOfNominalValues = att.numValues();
        }
        numberOfValues = attributeStats.getTotalObservations();

        if (att.isNumeric()) {
            maximumValue = attributeStats.getMaximum();
            minimumValue = attributeStats.getMinimum();
            meanValue = attributeStats.getMean();
            standardDeviation = 0.0;
            try {
                standardDeviation = attributeStats.getStandardDeviation();
            } catch (Exception e) {
                Conversion.log("WARNING", "StdDev", "Could not compute standard deviation of feature "
                        + att.name() + ": " + e.getMessage());
            }
        }

        if (att.type() == 0) {
            data_type = "numeric";
        } else if (att.type() == 1) {
            data_type = "nominal";
        } else if (att.type() == 2) {
            data_type = "string";
        } else {
            data_type = "unknown";
        }

        resultFeatures.add(new Feature(att.index(), att.name(), data_type, att.index() == dataset.classIndex(),
                numberOfDistinctValues, numberOfUniqueValues, numberOfMissingValues, numberOfIntegerValues,
                numberOfRealValues, numberOfNominalValues, numberOfValues, maximumValue, minimumValue,
                meanValue, standardDeviation, attributeStats.getClassDistribution()));
    }
    return resultFeatures;
}

From source file:org.openml.webapplication.features.FantailConnector.java

License:Open Source License

private List<Quality> datasetCharacteristics(Instances fulldata, Integer start, Integer interval_size,
        List<String> qualitiesAvailable) throws Exception {
    List<Quality> result = new ArrayList<DataQuality.Quality>();
    Instances intervalData;/*  ww w.j  ava 2  s .c  o m*/

    // Be careful changing this!
    if (interval_size != null) {
        intervalData = new Instances(fulldata, start, Math.min(interval_size, fulldata.numInstances() - start));
        intervalData = applyFilter(intervalData, new StringToNominal(), "-R first-last");
        intervalData.setClassIndex(fulldata.classIndex());
    } else {
        intervalData = fulldata;
        // todo: use StringToNominal filter? might be to expensive
    }

    for (Characterizer dc : batchCharacterizers) {
        if (qualitiesAvailable != null && qualitiesAvailable.containsAll(Arrays.asList(dc.getIDs())) == false) {
            Conversion.log("OK", "Extract Batch Features",
                    dc.getClass().getName() + ": " + Arrays.toString(dc.getIDs()));
            Map<String, Double> qualities = dc.characterize(intervalData);
            result.addAll(hashMaptoList(qualities, start, interval_size));
        } else {
            Conversion.log("OK", "Extract Batch Features", dc.getClass().getName() + " - already in database");
        }
    }
    return result;
}

From source file:org.openscience.cdk.applications.taverna.io.ARFFFileReaderActivity.java

License:Open Source License

@Override
public void work() throws Exception {
    // Get input//from   w w  w. ja va2 s. c  om
    List<File> files = this.getInputAsFileList(this.INPUT_PORTS[0]);
    // Do work
    List<Instances> datasets = new LinkedList<Instances>();
    for (File file : files) {
        try {
            Instances instances = DataSource.read(file.getPath());
            int lastAttr = instances.numAttributes() - 1;
            if (instances.classIndex() == -1 && instances.attribute(lastAttr).name().equals("Class")) {
                instances.setClassIndex(lastAttr);
            }
            datasets.add(instances);
        } catch (Exception e) {
            ErrorLogger.getInstance().writeError(CDKTavernaException.READ_FILE_ERROR + file,
                    this.getActivityName(), e);
        }
    }
    // Set output
    this.setOutputAsObjectList(datasets, this.OUTPUT_PORTS[0]);
}

From source file:org.opentox.jaqpot3.qsar.trainer.MlrRegression.java

License:Open Source License

@Override
public Model train(Instances data) throws JaqpotException {
    try {// www  . jav  a  2  s.co  m

        getTask().getMeta().addComment(
                "Dataset successfully retrieved and converted " + "into a weka.core.Instances object");
        UpdateTask firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        Instances trainingSet = data;
        getTask().getMeta().addComment("The downloaded dataset is now preprocessed");
        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* SET CLASS ATTRIBUTE */
        Attribute target = trainingSet.attribute(targetUri.toString());
        if (target == null) {
            throw new BadParameterException("The prediction feature you provided was not found in the dataset");
        } else {
            if (!target.isNumeric()) {
                throw new QSARException("The prediction feature you provided is not numeric.");
            }
        }
        trainingSet.setClass(target);
        /* Very important: place the target feature at the end! (target = last)*/
        int numAttributes = trainingSet.numAttributes();
        int classIndex = trainingSet.classIndex();
        Instances orderedTrainingSet = null;
        List<String> properOrder = new ArrayList<String>(numAttributes);
        for (int j = 0; j < numAttributes; j++) {
            if (j != classIndex) {
                properOrder.add(trainingSet.attribute(j).name());
            }
        }
        properOrder.add(trainingSet.attribute(classIndex).name());
        try {
            orderedTrainingSet = InstancesUtil.sortByFeatureAttrList(properOrder, trainingSet, -1);
        } catch (JaqpotException ex) {
            logger.error("Improper dataset - training will stop", ex);
            throw ex;
        }
        orderedTrainingSet.setClass(orderedTrainingSet.attribute(targetUri.toString()));

        /* START CONSTRUCTION OF MODEL */
        Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));
        m.setAlgorithm(getAlgorithm());
        m.setCreatedBy(getTask().getCreatedBy());
        m.setDataset(datasetUri);
        m.addDependentFeatures(dependentFeature);
        try {
            dependentFeature.loadFromRemote();
        } catch (ServiceInvocationException ex) {
            Logger.getLogger(MlrRegression.class.getName()).log(Level.SEVERE, null, ex);
        }

        Set<LiteralValue> depFeatTitles = null;
        if (dependentFeature.getMeta() != null) {
            depFeatTitles = dependentFeature.getMeta().getTitles();
        }

        String depFeatTitle = dependentFeature.getUri().toString();
        if (depFeatTitles != null) {
            depFeatTitle = depFeatTitles.iterator().next().getValueAsString();
            m.getMeta().addTitle("MLR model for " + depFeatTitle)
                    .addDescription("MLR model for the prediction of " + depFeatTitle + " (uri: "
                            + dependentFeature.getUri() + " ).");
        } else {
            m.getMeta().addTitle("MLR model for the prediction of the feature with URI " + depFeatTitle)
                    .addComment("No name was found for the feature " + depFeatTitle);
        }

        /*
         * COMPILE THE LIST OF INDEPENDENT FEATURES with the exact order in which
         * these appear in the Instances object (training set).
         */
        m.setIndependentFeatures(independentFeatures);

        /* CREATE PREDICTED FEATURE AND POST IT TO REMOTE SERVER */
        String predictionFeatureUri = null;
        Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
                "Predicted " + depFeatTitle + " by MLR model", datasetUri, featureService);
        m.addPredictedFeatures(predictedFeature);
        predictionFeatureUri = predictedFeature.getUri().toString();

        getTask().getMeta().addComment("Prediction feature " + predictionFeatureUri + " was created.");

        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* ACTUAL TRAINING OF THE MODEL USING WEKA */
        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };

        try {
            linreg.setOptions(linRegOptions);
            linreg.buildClassifier(orderedTrainingSet);

        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        try {
            // evaluate classifier and print some statistics
            Evaluation eval = new Evaluation(orderedTrainingSet);
            eval.evaluateModel(linreg, orderedTrainingSet);
            String stats = eval.toSummaryString("\nResults\n======\n", false);

            ActualModel am = new ActualModel(linreg);
            am.setStatistics(stats);
            m.setActualModel(am);
        } catch (NotSerializableException ex) {
            String message = "Model is not serializable";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        m.getMeta().addPublisher("OpenTox").addComment("This is a Multiple Linear Regression Model");

        //save the instances being predicted to abstract trainer for calculating DoA
        predictedInstances = orderedTrainingSet;
        excludeAttributesDoA.add(dependentFeature.getUri().toString());

        return m;
    } catch (QSARException ex) {
        String message = "QSAR Exception: cannot train MLR model";
        logger.error(message, ex);
        throw new JaqpotException(message, ex);
    }
}