Example usage for weka.attributeSelection ChiSquaredAttributeEval ChiSquaredAttributeEval

List of usage examples for weka.attributeSelection ChiSquaredAttributeEval ChiSquaredAttributeEval

Introduction

In this page you can find the example usage for weka.attributeSelection ChiSquaredAttributeEval ChiSquaredAttributeEval.

Prototype

public ChiSquaredAttributeEval() 

Source Link

Document

Constructor

Usage

From source file:FeatureSelectionClass.java

public AttributeSelection withChiSquare(String path) throws Exception {
    int N;// ww  w  . j ava 2s  .c  om
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    ChiSquaredAttributeEval evaluator = new ChiSquaredAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:ca.uottawa.balie.WekaAttributeSelection.java

License:Open Source License

/**
 * Select the top attributes/* w w  w  .j av  a  2s  .  com*/
 */
public void Select(boolean pi_Debug) {
    Instances insts = m_DummyLearner.GetTrainInstances();

    try {
        ASEvaluation eval = null;
        ASSearch search = null;

        if (m_Evaluator == WEKA_CHI_SQUARE) {
            eval = new ChiSquaredAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_INFO_GAIN) {
            eval = new InfoGainAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_WRAPPER) {
            eval = new ClassifierSubsetEval();
            ((ClassifierSubsetEval) eval).setClassifier(new NaiveBayes());
            search = new Ranker(); // TODO: use something else than ranker
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SYM_UNCERT) {
            eval = new SymmetricalUncertAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SVM) {
            eval = new SVMAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_RELIEF) {
            eval = new ReliefFAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_ONER) {
            eval = new OneRAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        }

        m_AttributeSelection = new AttributeSelection();
        m_AttributeSelection.setEvaluator(eval);
        m_AttributeSelection.setSearch(search);

        m_AttributeSelection.SelectAttributes(insts);
        if (pi_Debug)
            System.out.println(m_AttributeSelection.toResultsString());

    } catch (Exception e) {
        System.err.println(e.getMessage());
    }

}

From source file:it.poliba.sisinflab.simlib.featureSelection.methods.CHI.java

public void execute(String dataset) {
    try {/*w w w  . j  ava2 s .  c o m*/

        if (dataset.length() == 0)
            throw new IllegalArgumentException();
        // Load input dataset.
        DataSource source = new DataSource(dataset);
        System.out.println("Reading instances...");
        Instances data = source.getDataSet();

        // Performs a principal components analysis.
        ChiSquaredAttributeEval chiEvaluator = new ChiSquaredAttributeEval();

        // Ranking the attributes.
        Ranker ranker = new Ranker();
        // Specify the number of attributes to select from the ranked list.
        /*ranker.setThreshold(-1.7976931348623157E308);
        ranker.setNumToSelect(-1);
        ranker.setGenerateRanking(true);*/
        ranker.setNumToSelect(-1);

        AttributeSelection selector = new AttributeSelection();
        System.out.println("Selecting attributes...");
        selector.setSearch(ranker);
        selector.setEvaluator(chiEvaluator);
        selector.SelectAttributes(data);

        PrintStream o = new PrintStream(new File("data/" + "CHIResults" + ".txt"));
        System.setOut(o);
        System.out.println(Arrays.toString(selector.rankedAttributes()));
        System.out.println(Arrays.toString(selector.selectedAttributes()));
        //System.out.println(selector.CVResultsString());
        System.out.println(selector.toResultsString());

        System.out.println();

    } catch (IllegalArgumentException e) {
        System.err.println("Error");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:mulan.examples.ChiSquareReduction.java

License:Open Source License

/**
 * Executes this example//from www  . j a v  a2  s. c om
 * 
 * @param args
 *            command-line arguments -path and -filestem, e.g. -path
 *            datasets/ -filestem emotions
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    String path = Utils.getOption("path", args);
    String filestem = Utils.getOption("filestem", args);
    String attributesToKeep = Utils.getOption("numattribs", args);
    final int NUM_TO_KEEP = Integer.parseInt(attributesToKeep);

    MultiLabelInstances mlData = new MultiLabelInstances(path + filestem + ".arff", path + filestem + ".xml");

    ASEvaluation ase = new ChiSquaredAttributeEval();
    BinaryRelevanceAttributeEvaluator ae = new BinaryRelevanceAttributeEvaluator(ase, mlData, "max", "none",
            "eval");

    Ranker r = new Ranker();
    int[] result = r.search(ae, mlData);
    // System.out.println(Arrays.toString(result));

    System.out.println(mlData.getDataSet().numAttributes());

    if (NUM_TO_KEEP == 0) {
        for (int i = 0; i < mlData.getFeatureIndices().length; i++) {
            System.out
                    .println("Attribute " + mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).name()
                            + " : " + ae.evaluateAttribute(
                                    mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).index()));
        }
    }

    else {
        int[] toKeep = new int[NUM_TO_KEEP + mlData.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, NUM_TO_KEEP);
        int[] labelIndices = mlData.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, NUM_TO_KEEP, mlData.getNumLabels());

        Remove filterRemove = new Remove();
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mlData.getDataSet());
        Instances filtered = Filter.useFilter(mlData.getDataSet(), filterRemove);
        MultiLabelInstances mlFiltered = new MultiLabelInstances(filtered, mlData.getLabelsMetaData());

        System.out.println(mlFiltered.getDataSet());
    }

}

From source file:old.CFS.java

/**
 * uses the meta-classifier/*from   w ww  .  java 2s.  co  m*/
 */
protected static void useClassifier(Instances data) throws Exception {
    System.out.println("\n1. Meta-classfier");
    AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    J48 base = new J48();
    classifier.setClassifier(base);
    classifier.setEvaluator(eval);
    classifier.setSearch(search);
    Evaluation evaluation = new Evaluation(data);
    evaluation.crossValidateModel(classifier, data, 10, new Random(1));
    System.out.println(evaluation.toSummaryString());
}

From source file:old.CFS.java

/**
 * uses the filter//ww w  .j ava 2s .  com
 */
protected static void useFilter(Instances data) throws Exception {
    System.out.println("\n2. Filter");
    weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();

    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    filter.setEvaluator(eval);

    filter.setSearch(search);
    filter.setInputFormat(data);
    Instances newData = Filter.useFilter(data, filter);
    System.out.println(newData);
}

From source file:old.CFS.java

/**
 * uses the low level approach/*  w  w  w. j  a  va  2  s .  com*/
   * @param data
 */
protected static void useLowLevel(Instances data) throws Exception {
    System.out.println("\n3. Low-level");
    AttributeSelection attsel = new AttributeSelection();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    attsel.setEvaluator(eval);
    attsel.setSearch(search);
    attsel.setFolds(10);
    attsel.setXval(true);
    attsel.SelectAttributes(data);
    //    System.out.println(data.toSummaryString());
    //    attsel.selectAttributesCVSplit(data);
    //    attsel.SelectAttributes(data);

    System.out.println(attsel.CrossValidateAttributes());
    //    attsel.SelectAttributes(data);
    //    attsel.selectAttributesCVSplit(data);
    Instances newData = attsel.reduceDimensionality(data);

    int[] indices = attsel.selectedAttributes();
    System.out.println(newData);
    System.out.println("selected attribute indices (starting with 0):\n" + Utils.arrayToString(indices));
}

From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java

License:Apache License

/**
 * Constructor to instantiate a new FeatureEvaluator object.
 *
 * @param json the data string/*  www. j  a va2s  . c  o m*/
 * @param data the data set
 * @throws Exception the exception
 */

public FeatureEvaluator(String json, Instances data) throws Exception {
    //   public FeatureEvaluator(String json, Instances data, String filePath) throws Exception {

    this.featureTitles = new ArrayList<String>();
    this.featureScores = new ArrayList<Double>();
    this.featureWeights = new ArrayList<Double>();
    this.featurePriorities = new ArrayList<Double>();

    OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString());
    JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data");
    String csvString = "";
    String str;
    for (int i = 0; i < jsontokenArray.length(); i++) {
        str = jsontokenArray.get(i).toString();
        str = str.substring(1, str.length() - 1);
        csvString += str + "\n";
    }

    String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv";
    File file = new File(filePath);
    // if file does not exists, then create it
    if (!file.exists())
        file.createNewFile();

    FileUtils.writeStringToFile(file, csvString);

    CSVLoader loader = new CSVLoader();
    loader.setSource(new File(filePath));
    data = loader.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    int numUnlabeledAttributes = data.numAttributes() - 1;
    double[] minmaxValues = new double[2];
    double min, max;

    String[] options = new String[1];
    options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2
    Ranker atrank = new Ranker();
    atrank.setOptions(options);

    weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection();

    //  Information Gain Attribute Evaluator
    InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval();
    atsel.setEvaluator(infoGainAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] infoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(infoGainRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledInfoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Gain Ratio Attribute Evaluator
    GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval();
    atsel.setEvaluator(gainRatioAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] gainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(gainRatioRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledGainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Chi Squared Attribute Evaluator
    ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval();
    atsel.setEvaluator(chiSquaredAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] chiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(chiSquaredRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Symmetrical Uncert Attribute Evaluator
    SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval();
    atsel.setEvaluator(symmetricalUncertAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] symmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(symmetricalUncertRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min)))
                / 10000d;
    }

    //  Significance Attribute Evaluator
    SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval();
    atsel.setEvaluator(significanceAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] significanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(significanceRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSignificanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d;
    }

    double attributeSum;

    double[] combinedRanks = new double[numUnlabeledAttributes];
    double combinedranksSum = 0;

    for (int i = 0; i < numUnlabeledAttributes; i++) {
        attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i]
                + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i];
        combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d;
        combinedranksSum = combinedranksSum + combinedRanks[i];
    }

    double[][] tempArray = new double[numUnlabeledAttributes][2];
    String[] attributesTitles = new String[numUnlabeledAttributes];
    double[] attributesScores = new double[numUnlabeledAttributes];
    double[] attributesWeights = new double[numUnlabeledAttributes];
    double[] attributesPriorities = new double[numUnlabeledAttributes];

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        tempArray[j][0] = j;
        tempArray[j][1] = combinedRanks[j];
    }

    double temp;
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        for (int j = 1; j < (numUnlabeledAttributes - i); j++) {
            if (combinedRanks[j - 1] < combinedRanks[j]) {
                //swap the elements!
                temp = combinedRanks[j - 1];
                combinedRanks[j - 1] = combinedRanks[j];
                combinedRanks[j] = temp;
            }
        }
    }

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        for (int k = 0; k < numUnlabeledAttributes; k++) {
            if (combinedRanks[j] == tempArray[k][1]) {
                attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString();
                String res[] = attributesTitles[j].split("\\s+");
                attributesTitles[j] = res[1];

                this.featureTitles.add(attributesTitles[j]);
                break;
            }
        }
        attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d;
        attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d;
        attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d;
        this.featureScores.add(attributesScores[j]);
        this.featureWeights.add(attributesWeights[j]);
        this.featurePriorities.add(attributesPriorities[j]);

        System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important");
    }

}

From source file:preprocess.FeatureSelector.java

License:Open Source License

/**
 * Select features from dataset by BR method
 * //w  ww .j  a  v a2s  . c  o m
 * @param combination Combination type
 * @param normalization Normalization type
 * @param output Output type
 * @return Feature-selected dataset
 */
public MultiLabelInstances select(String combination, String normalization, String output) {

    MultiLabelInstances modifiedDataset = null;

    if ((!combination.equals("max")) && (!combination.equals("min")) && (!combination.equals("avg"))
            && (!normalization.equals("dl")) && (!normalization.equals("dm")) && (!normalization.equals("none"))
            && (!output.equals("eval")) && (!output.equals("rank"))) {
        return null;
    }

    try {
        ASEvaluation ase = new ChiSquaredAttributeEval();
        BinaryRelevanceAttributeEvaluator ae = new BinaryRelevanceAttributeEvaluator(ase, dataset, combination,
                normalization, output);

        Ranker r = new Ranker();
        int[] result = r.search(ae, dataset);

        int[] toKeep = new int[nFeatures + dataset.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, nFeatures);
        int[] labelIndices = dataset.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, nFeatures, dataset.getNumLabels());

        Remove filterRemove = new Remove();
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(dataset.getDataSet());

        modifiedDataset = new MultiLabelInstances(Filter.useFilter(dataset.getDataSet(), filterRemove),
                dataset.getLabelsMetaData());

    } catch (Exception ex) {
        Logger.getLogger(FeatureSelector.class.getName()).log(Level.SEVERE, null, ex);
    }

    return modifiedDataset;
}

From source file:sirius.misc.featurevisualizer.FeatureVisualizerPane.java

License:Open Source License

private void loadArffFile(final File file) throws Exception {
    Thread runThread = new Thread() {
        public void run() {
            try {
                instances = new Instances(new BufferedReader(new FileReader(file.getAbsolutePath())));
                String sequenceNameFile = file.getAbsolutePath().substring(0,
                        file.getAbsolutePath().indexOf(".arff")) + ".sequencesName";
                featureGraphPane.setInstances(instances);
                sequenceNameTableModel.loadSequencesNameFile(sequenceNameFile);
                statusTextField.setText("Loading..");
                instances.setClassIndex(instances.numAttributes() - 1);
                myChiSquare = new ChiSquaredAttributeEval();
                myChiSquare.buildEvaluator(instances);
                yAxisComboBox.removeAllItems();
                xAxisComboBox.removeAllItems();
                yAxisComboBox.addItem("       ");
                xAxisComboBox.addItem("       ");
                updateComboBox(yAxisComboBox);
                updateComboBox(xAxisComboBox);
                updateComboBox(classComboBox);
                classComboBox.setSelectedIndex(classComboBox.getItemCount() - 1);
                classComboBox.repaint();
                sequencesNumTextField.setText("" + instances.numInstances());
                featuresNumTextField.setText("" + instances.numAttributes());
                topChiSquareButton.setEnabled(true);
                topCFSButton.setEnabled(true);
                top1CFSIndex = -1;//  w  w w . j a v a2s  . c  om
                top2CFSIndex = -1;
                statusTextField.setText("Done");
            } catch (Exception e) {
                JOptionPane.showMessageDialog(null, e.getMessage(), "Error", JOptionPane.ERROR_MESSAGE);
                e.printStackTrace();
            }
        }
    };
    runThread.setPriority(Thread.MIN_PRIORITY);
    runThread.start();
}