Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * Ranker.java * Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand * */ package weka.attributeSelection; import java.util.Enumeration; import java.util.Vector; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.Range; import weka.core.RevisionUtils; import weka.core.Utils; /** * <!-- globalinfo-start --> Ranker : <br/> * <br/> * Ranks attributes by their individual evaluations. Use in conjunction with * attribute evaluators (ReliefF, GainRatio, Entropy etc).<br/> * <p/> * <!-- globalinfo-end --> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7. * Any starting attributes specified are * ignored during the ranking. * </pre> * * <pre> * -T <threshold> * Specify a theshold by which attributes * may be discarded from the ranking. * </pre> * * <pre> * -N <num to select> * Specify number of attributes to select * </pre> * * <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision$ */ public class Ranker extends ASSearch implements RankedOutputSearch, StartSetHandler, OptionHandler { /** for serialization */ static final long serialVersionUID = -9086714848510751934L; /** Holds the starting set as an array of attributes */ private int[] m_starting; /** Holds the start set for the search as a range */ private Range m_startRange; /** Holds the ordered list of attributes */ private int[] m_attributeList; /** Holds the list of attribute merit scores */ private double[] m_attributeMerit; /** Data has class attribute---if unsupervised evaluator then no class */ private boolean m_hasClass; /** Class index of the data if supervised evaluator */ private int m_classIndex; /** The number of attribtes */ private int m_numAttribs; /** * A threshold by which to discard attributes---used by the AttributeSelection * module */ private double m_threshold; /** * The number of attributes to select. -1 indicates that all attributes are to * be retained. Has precedence over m_threshold */ private int m_numToSelect = -1; /** Used to compute the number to select */ private int m_calculatedNumToSelect = -1; /** * Returns a string describing this search method * * @return a description of the search suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Ranker : \n\nRanks attributes by their individual evaluations. " + "Use in conjunction with attribute evaluators (ReliefF, GainRatio, " + "Entropy etc).\n"; } /** * Constructor */ public Ranker() { resetOptions(); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String numToSelectTipText() { return "Specify the number of attributes to retain. The default value " + "(-1) indicates that all attributes are to be retained. Use either " + "this option or a threshold to reduce the attribute set."; } /** * Specify the number of attributes to select from the ranked list. -1 * indicates that all attributes are to be retained. * * @param n the number of attributes to retain */ @Override public void setNumToSelect(int n) { m_numToSelect = n; } /** * Gets the number of attributes to be retained. * * @return the number of attributes to retain */ @Override public int getNumToSelect() { return m_numToSelect; } /** * Gets the calculated number to select. This might be computed from a * threshold, or if < 0 is set as the number to select then it is set to the * number of attributes in the (transformed) data. * * @return the calculated number of attributes to select */ @Override public int getCalculatedNumToSelect() { if (m_numToSelect >= 0) { m_calculatedNumToSelect = m_numToSelect > m_attributeMerit.length ? m_attributeMerit.length : m_numToSelect; } return m_calculatedNumToSelect; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String thresholdTipText() { return "Set threshold by which attributes can be discarded. Default value " + "results in no attributes being discarded. Use either this option or " + "numToSelect to reduce the attribute set."; } /** * Set the threshold by which the AttributeSelection module can discard * attributes. * * @param threshold the threshold. */ @Override public void setThreshold(double threshold) { m_threshold = threshold; } /** * Returns the threshold so that the AttributeSelection module can discard * attributes from the ranking. */ @Override public double getThreshold() { return m_threshold; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String generateRankingTipText() { return "A constant option. Ranker is only capable of generating " + " attribute rankings."; } /** * This is a dummy set method---Ranker is ONLY capable of producing a ranked * list of attributes for attribute evaluators. * * @param doRank this parameter is N/A and is ignored */ @Override public void setGenerateRanking(boolean doRank) { } /** * This is a dummy method. Ranker can ONLY be used with attribute evaluators * and as such can only produce a ranked list of attributes * * @return true all the time. */ @Override public boolean getGenerateRanking() { return true; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String startSetTipText() { return "Specify a set of attributes to ignore. " + " When generating the ranking, Ranker will not evaluate the attributes " + " in this list. " + "This is specified as a comma " + "seperated list off attribute indexes starting at 1. It can include " + "ranges. Eg. 1,2,5-9,17."; } /** * Sets a starting set of attributes for the search. It is the search method's * responsibility to report this start set (if any) in its toString() method. * * @param startSet a string containing a list of attributes (and or ranges), * eg. 1,2,6,10-15. * @throws Exception if start set can't be set. */ @Override public void setStartSet(String startSet) throws Exception { m_startRange.setRanges(startSet); } /** * Returns a list of attributes (and or attribute ranges) as a String * * @return a list of attributes (and or attribute ranges) */ @Override public String getStartSet() { return m_startRange.getRanges(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. **/ @Override public Enumeration<Option> listOptions() { Vector<Option> newVector = new Vector<Option>(3); newVector.addElement(new Option( "\tSpecify a starting set of attributes.\n" + "\tEg. 1,3,5-7.\n" + "\tAny starting attributes specified are\n" + "\tignored during the ranking.", "P", 1, "-P <start set>")); newVector.addElement( new Option("\tSpecify a theshold by which attributes\n" + "\tmay be discarded from the ranking.", "T", 1, "-T <threshold>")); newVector.addElement(new Option("\tSpecify number of attributes to select", "N", 1, "-N <num to select>")); return newVector.elements(); } /** * Parses a given list of options. * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7. * Any starting attributes specified are * ignored during the ranking. * </pre> * * <pre> * -T <threshold> * Specify a theshold by which attributes * may be discarded from the ranking. * </pre> * * <pre> * -N <num to select> * Specify number of attributes to select * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('T', options); if (optionString.length() != 0) { Double temp; temp = Double.valueOf(optionString); setThreshold(temp.doubleValue()); } optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumToSelect(Integer.parseInt(optionString)); } } /** * Gets the current settings of ReliefFAttributeEval. * * @return an array of strings suitable for passing to setOptions() */ @Override public String[] getOptions() { Vector<String> options = new Vector<String>(); if (!(getStartSet().equals(""))) { options.add("-P"); options.add("" + startSetToString()); } options.add("-T"); options.add("" + getThreshold()); options.add("-N"); options.add("" + getNumToSelect()); return options.toArray(new String[0]); } /** * converts the array of starting attributes to a string. This is used by * getOptions to return the actual attributes specified as the starting set. * This is better than using m_startRanges.getRanges() as the same start set * can be specified in different ways from the command line---eg 1,2,3 == 1-3. * This is to ensure that stuff that is stored in a database is comparable. * * @return a comma seperated list of individual attribute numbers as a String */ private String startSetToString() { StringBuffer FString = new StringBuffer(); boolean didPrint; if (m_starting == null) { return getStartSet(); } for (int i = 0; i < m_starting.length; i++) { didPrint = false; if ((m_hasClass == false) || (m_hasClass == true && i != m_classIndex)) { FString.append((m_starting[i] + 1)); didPrint = true; } if (i == (m_starting.length - 1)) { FString.append(""); } else { if (didPrint) { FString.append(","); } } } return FString.toString(); } /** * Kind of a dummy search algorithm. Calls a Attribute evaluator to evaluate * each attribute not included in the startSet and then sorts them to produce * a ranked list of attributes. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ @Override public int[] search(ASEvaluation ASEval, Instances data) throws Exception { int i, j; if (!(ASEval instanceof AttributeEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a" + "Attribute evaluator!"); } m_numAttribs = data.numAttributes(); if (ASEval instanceof UnsupervisedAttributeEvaluator) { m_hasClass = false; } else { m_classIndex = data.classIndex(); if (m_classIndex >= 0) { m_hasClass = true; } else { m_hasClass = false; } } // get the transformed data and check to see if the transformer // preserves a class index if (ASEval instanceof AttributeTransformer) { data = ((AttributeTransformer) ASEval).transformedHeader(); if (m_classIndex >= 0 && data.classIndex() >= 0) { m_classIndex = data.classIndex(); m_hasClass = true; } } m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } int sl = 0; if (m_starting != null) { sl = m_starting.length; } if ((m_starting != null) && (m_hasClass == true)) { // see if the supplied list contains the class index boolean ok = false; for (i = 0; i < sl; i++) { if (m_starting[i] == m_classIndex) { ok = true; break; } } if (ok == false) { sl++; } } else { if (m_hasClass == true) { sl++; } } m_attributeList = new int[m_numAttribs - sl]; m_attributeMerit = new double[m_numAttribs - sl]; // add in those attributes not in the starting (omit list) for (i = 0, j = 0; i < m_numAttribs; i++) { if (!inStarting(i)) { m_attributeList[j++] = i; } } AttributeEvaluator ASEvaluator = (AttributeEvaluator) ASEval; for (i = 0; i < m_attributeList.length; i++) { m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]); } double[][] tempRanked = rankedAttributes(); int[] rankedAttributes = new int[m_attributeList.length]; for (i = 0; i < m_attributeList.length; i++) { rankedAttributes[i] = (int) tempRanked[i][0]; } return rankedAttributes; } /** * Sorts the evaluated attribute list * * @return an array of sorted (highest eval to lowest) attribute indexes * @throws Exception of sorting can't be done. */ @Override public double[][] rankedAttributes() throws Exception { int i, j; if (m_attributeList == null || m_attributeMerit == null) { throw new Exception("Search must be performed before a ranked " + "attribute list can be obtained"); } int[] ranked = Utils.sort(m_attributeMerit); // reverse the order of the ranked indexes double[][] bestToWorst = new double[ranked.length][2]; for (i = ranked.length - 1, j = 0; i >= 0; i--) { bestToWorst[j++][0] = ranked[i]; } // convert the indexes to attribute indexes for (i = 0; i < bestToWorst.length; i++) { int temp = ((int) bestToWorst[i][0]); bestToWorst[i][0] = m_attributeList[temp]; bestToWorst[i][1] = m_attributeMerit[temp]; } // if (m_numToSelect > bestToWorst.length) { // throw new Exception("More attributes requested than exist in the data"); // } if (m_numToSelect <= 0) { if (m_threshold == -Double.MAX_VALUE) { m_calculatedNumToSelect = bestToWorst.length; } else { determineNumToSelectFromThreshold(bestToWorst); } } /* * if (m_numToSelect > 0) { determineThreshFromNumToSelect(bestToWorst); } */ return bestToWorst; } private void determineNumToSelectFromThreshold(double[][] ranking) { int count = 0; for (double[] element : ranking) { if (element[1] > m_threshold) { count++; } } m_calculatedNumToSelect = count; } /** * returns a description of the search as a String * * @return a description of the search */ @Override public String toString() { StringBuffer BfString = new StringBuffer(); BfString.append("\tAttribute ranking.\n"); if (m_starting != null) { BfString.append("\tIgnored attributes: "); BfString.append(startSetToString()); BfString.append("\n"); } if (m_threshold != -Double.MAX_VALUE) { BfString.append( "\tThreshold for discarding attributes: " + Utils.doubleToString(m_threshold, 8, 4) + "\n"); } return BfString.toString(); } /** * Resets stuff to default values */ protected void resetOptions() { m_starting = null; m_startRange = new Range(); m_attributeList = null; m_attributeMerit = null; m_threshold = -Double.MAX_VALUE; } private boolean inStarting(int feat) { // omit the class from the evaluation if ((m_hasClass == true) && (feat == m_classIndex)) { return true; } if (m_starting == null) { return false; } for (int element : m_starting) { if (element == feat) { return true; } } return false; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } }