Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * MultiInstanceToPropositional.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */ package weka.filters.unsupervised.attribute; import java.util.ArrayList; import java.util.Enumeration; import java.util.Vector; import weka.core.Attribute; import weka.core.Capabilities; import weka.core.Capabilities.Capability; import weka.core.DenseInstance; import weka.core.Instance; import weka.core.Instances; import weka.core.MultiInstanceCapabilitiesHandler; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RelationalLocator; import weka.core.RevisionUtils; import weka.core.SelectedTag; import weka.core.StringLocator; import weka.core.Tag; import weka.core.Utils; import weka.filters.Filter; import weka.filters.UnsupervisedFilter; /** * <!-- globalinfo-start --> Converts the multi-instance dataset into single * instance dataset so that the Nominalize, Standardize and other type of * filters or transformation can be applied to these data for the further * preprocessing.<br> * Note: the first attribute of the converted dataset is a nominal attribute and * refers to the bagId. * <p> * <!-- globalinfo-end --> * * <!-- options-start --> Valid options are: * </p> * * <pre> * -A <num> * The type of weight setting for each prop. instance: * 0.weight = original single bag weight /Total number of * prop. instance in the corresponding bag; * 1.weight = 1.0; * 2.weight = 1.0/Total number of prop. instance in the * corresponding bag; * 3. weight = Total number of prop. instance / (Total number * of bags * Total number of prop. instance in the * corresponding bag). * (default:0) * </pre> * * <!-- options-end --> * * @author Lin Dong (ld21@cs.waikato.ac.nz) * @version $Revision$ * @see PropositionalToMultiInstance */ public class MultiInstanceToPropositional extends Filter implements OptionHandler, UnsupervisedFilter, MultiInstanceCapabilitiesHandler { /** for serialization */ private static final long serialVersionUID = -4102847628883002530L; /** the total number of bags */ protected int m_NumBags = -1; /** Indices of string attributes in the bag */ protected StringLocator m_BagStringAtts = null; /** Indices of relational attributes in the bag */ protected RelationalLocator m_BagRelAtts = null; /** the total number of the propositional instance in the dataset */ protected int m_NumInstances; /** weight method: keep the weight to be the same as the original value */ public static final int WEIGHTMETHOD_ORIGINAL = 0; /** weight method: 1.0 */ public static final int WEIGHTMETHOD_1 = 1; /** weight method: 1.0 / Total # of prop. instance in the corresp. bag */ public static final int WEIGHTMETHOD_INVERSE1 = 2; /** * weight method: Total # of prop. instance / (Total # of bags * Total # of * prop. instance in the corresp. bag) */ public static final int WEIGHTMETHOD_INVERSE2 = 3; /** weight methods */ public static final Tag[] TAGS_WEIGHTMETHOD = { new Tag(WEIGHTMETHOD_ORIGINAL, "keep the weight to be the same as the original value"), new Tag(WEIGHTMETHOD_1, "1.0"), new Tag(WEIGHTMETHOD_INVERSE1, "1.0 / Total # of prop. instance in the corresp. bag"), new Tag(WEIGHTMETHOD_INVERSE2, "Total # of prop. instance / (Total # of bags * Total # of prop. instance in the corresp. bag)") }; /** the propositional instance weight setting method */ protected int m_WeightMethod = WEIGHTMETHOD_INVERSE2; /** * Returns an enumeration describing the available options * * @return an enumeration of all the available options */ @Override public Enumeration<Option> listOptions() { Vector<Option> result = new Vector<Option>(1); result.addElement(new Option("\tThe type of weight setting for each prop. instance:\n" + "\t0.weight = original single bag weight /Total number of\n" + "\tprop. instance in the corresponding bag;\n" + "\t1.weight = 1.0;\n" + "\t2.weight = 1.0/Total number of prop. instance in the \n" + "\t\tcorresponding bag; \n" + "\t3. weight = Total number of prop. instance / (Total number \n" + "\t\tof bags * Total number of prop. instance in the \n" + "\t\tcorresponding bag). \n" + "\t(default:0)", "A", 1, "-A <num>")); return result.elements(); } /** * <p>Parses a given list of options. * </p> * * <!-- options-start --> Valid options are: * * <pre> * -A <num> * The type of weight setting for each prop. instance: * 0.weight = original single bag weight /Total number of * prop. instance in the corresponding bag; * 1.weight = 1.0; * 2.weight = 1.0/Total number of prop. instance in the * corresponding bag; * 3. weight = Total number of prop. instance / (Total number * of bags * Total number of prop. instance in the * corresponding bag). * (default:0) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String weightString = Utils.getOption('A', options); if (weightString.length() != 0) { setWeightMethod(new SelectedTag(Integer.parseInt(weightString), TAGS_WEIGHTMETHOD)); } else { setWeightMethod(new SelectedTag(WEIGHTMETHOD_INVERSE2, TAGS_WEIGHTMETHOD)); } Utils.checkForRemainingOptions(options); } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> result = new Vector<String>(); result.add("-A"); result.add("" + m_WeightMethod); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String weightMethodTipText() { return "The method used for weighting the instances."; } /** * The new method for weighting the instances. * * @param method the new method */ public void setWeightMethod(SelectedTag method) { if (method.getTags() == TAGS_WEIGHTMETHOD) { m_WeightMethod = method.getSelectedTag().getID(); } } /** * Returns the current weighting method for instances. * * @return the current weight method */ public SelectedTag getWeightMethod() { return new SelectedTag(m_WeightMethod, TAGS_WEIGHTMETHOD); } /** * Returns a string describing this filter * * @return a description of the filter suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Converts the multi-instance dataset into single instance dataset " + "so that the Nominalize, Standardize and other type of filters or transformation " + " can be applied to these data for the further preprocessing.\n" + "Note: the first attribute of the converted dataset is a nominal " + "attribute and refers to the bagId."; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.disableAllAttributes(); result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance filter for the relational * data (i.e., the bags). * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getMultiInstanceCapabilities() { Capabilities result = new Capabilities(this); // attributes result.enableAllAttributes(); result.disable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); // other result.setMinimumNumberInstances(0); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set successfully */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.attribute(1).type() != Attribute.RELATIONAL) { throw new Exception("Can only handle relational-valued attribute!"); } super.setInputFormat(instanceInfo); Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy(); Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy(); // Indicator that batchFinished has not been called. m_NumBags = -1; /* create a new output format (propositional instance format) */ Instances tempData = instanceInfo.attribute(1).relation().stringFreeStructure(); ArrayList<Attribute> atts = new ArrayList<Attribute>(tempData.numAttributes()); for (int i = 0; i < tempData.numAttributes(); i++) { atts.add(tempData.attribute(i) .copy(instanceInfo.attribute(1).name() + "_" + tempData.attribute(i).name())); } Instances newData = new Instances(tempData.relationName(), atts, 0); newData.insertAttributeAt(bagIndex, 0); newData.insertAttributeAt(classAttribute, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); super.setOutputFormat(newData.stringFreeStructure()); m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure()); m_BagRelAtts = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure()); return true; } /** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input format has been set. */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } // Has batchFinished() been called? if (m_NumBags != -1) { convertInstance(instance); return true; } else { bufferInput(instance); return false; } } /** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } // Has batchFinished been called before? if (m_NumBags == -1) { Instances input = getInputFormat(); m_NumBags = input.numInstances(); m_NumInstances = 0; for (int i = 0; i < m_NumBags; i++) { if (input.instance(i).relationalValue(1) == null) { m_NumInstances++; } else { int nn = input.instance(i).relationalValue(1).numInstances(); m_NumInstances += nn; } } // Convert pending input instances for (int i = 0; i < input.numInstances(); i++) { convertInstance(input.instance(i)); } // Free memory flushInput(); } m_NewBatch = true; return (numPendingOutput() != 0); } /** * Convert a single bag over. The converted instances is added to the end of * the output queue. * * @param bag the bag to convert */ private void convertInstance(Instance bag) { Instances data = bag.relationalValue(1); int bagSize = 1; if (data != null) { bagSize = data.numInstances(); } double bagIndex = bag.value(0); double classValue = bag.classValue(); double weight = 0.0; // the proper weight for each instance in a bag if (m_WeightMethod == WEIGHTMETHOD_1) { weight = 1.0; } else if (m_WeightMethod == WEIGHTMETHOD_INVERSE1) { weight = 1.0 / bagSize; } else if (m_WeightMethod == WEIGHTMETHOD_INVERSE2) { weight = (double) m_NumInstances / (m_NumBags * bagSize); } else { weight = bag.weight() / bagSize; } Instance newInst; Instances outputFormat = getOutputFormat().stringFreeStructure(); for (int i = 0; i < bagSize; i++) { newInst = new DenseInstance(outputFormat.numAttributes()); newInst.setDataset(outputFormat); newInst.setValue(0, bagIndex); if (!bag.classIsMissing()) { newInst.setClassValue(classValue); } // copy the attribute values to new instance for (int j = 1; j < outputFormat.numAttributes() - 1; j++) { if (data == null) { newInst.setMissing(j); } else { newInst.setValue(j, data.instance(i).value(j - 1)); } } newInst.setWeight(weight); // copy strings/relational values StringLocator.copyStringValues(newInst, false, data, m_BagStringAtts, outputFormat, m_OutputStringAtts); RelationalLocator.copyRelationalValues(newInst, false, data, m_BagRelAtts, outputFormat, m_OutputRelAtts); push(newInst); } } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } /** * Main method for running this filter. * * @param args should contain arguments to the filter: use -h for help */ public static void main(String[] args) { runFilter(new MultiInstanceToPropositional(), args); } }