Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * LinearNNSearch.java * Copyright (C) 1999-2012 University of Waikato */ package weka.core.neighboursearch; import java.util.Collections; import java.util.Enumeration; import java.util.Vector; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.RevisionUtils; import weka.core.Utils; /** <!-- globalinfo-start --> * Class implementing the brute force search algorithm for nearest neighbour search. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S * Skip identical instances (distances equal to zero). * </pre> * <!-- options-end --> * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision$ */ public class LinearNNSearch extends NearestNeighbourSearch { /** for serialization. */ private static final long serialVersionUID = 1915484723703917241L; /** Array holding the distances of the nearest neighbours. It is filled up * both by nearestNeighbour() and kNearestNeighbours(). */ protected double[] m_Distances; /** Whether to skip instances from the neighbours that are identical to the query instance. */ protected boolean m_SkipIdentical = false; /** * Constructor. Needs setInstances(Instances) * to be called before the class is usable. */ public LinearNNSearch() { super(); } /** * Constructor that uses the supplied set of * instances. * * @param insts the instances to use */ public LinearNNSearch(Instances insts) { super(insts); m_DistanceFunction.setInstances(insts); } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the brute force search algorithm for nearest " + "neighbour search."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration<Option> listOptions() { Vector<Option> result = new Vector<Option>(); result.add(new Option("\tSkip identical instances (distances equal to zero).\n", "S", 1, "-S")); result.addAll(Collections.list(super.listOptions())); return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S * Skip identical instances (distances equal to zero). * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { super.setOptions(options); setSkipIdentical(Utils.getFlag('S', options)); Utils.checkForRemainingOptions(options); } /** * Gets the current settings. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions() { Vector<String> result = new Vector<String>(); Collections.addAll(result, super.getOptions()); if (getSkipIdentical()) result.add("-S"); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String skipIdenticalTipText() { return "Whether to skip identical instances (with distance 0 to the target)"; } /** * Sets the property to skip identical instances (with distance zero from * the target) from the set of neighbours returned. * * @param skip if true, identical intances are skipped */ public void setSkipIdentical(boolean skip) { m_SkipIdentical = skip; } /** * Gets whether if identical instances are skipped from the neighbourhood. * * @return true if identical instances are skipped */ public boolean getSkipIdentical() { return m_SkipIdentical; } /** * Returns the nearest instance in the current neighbourhood to the supplied * instance. * * @param target The instance to find the nearest neighbour for. * @return the nearest instance * @throws Exception if the nearest neighbour could not be found. */ public Instance nearestNeighbour(Instance target) throws Exception { return (kNearestNeighbours(target, 1)).instance(0); } /** * Returns k nearest instances in the current neighbourhood to the supplied * instance. * * @param target The instance to find the k nearest neighbours for. * @param kNN The number of nearest neighbours to find. * @return the k nearest neighbors * @throws Exception if the neighbours could not be found. */ public Instances kNearestNeighbours(Instance target, int kNN) throws Exception { //debug boolean print = false; if (m_Stats != null) m_Stats.searchStart(); MyHeap heap = new MyHeap(kNN); double distance; int firstkNN = 0; for (int i = 0; i < m_Instances.numInstances(); i++) { if (target == m_Instances.instance(i)) //for hold-one-out cross-validation continue; if (m_Stats != null) m_Stats.incrPointCount(); if (firstkNN < kNN) { if (print) System.out.println("K(a): " + (heap.size() + heap.noOfKthNearest())); distance = m_DistanceFunction.distance(target, m_Instances.instance(i), Double.POSITIVE_INFINITY, m_Stats); // Third condition in the following test is used because at least one nearest neighbour is needed if (distance == 0.0 && m_SkipIdentical && (i < m_Instances.numInstances() - 1)) continue; heap.put(i, distance); firstkNN++; } else { MyHeapElement temp = heap.peek(); if (print) System.out.println("K(b): " + (heap.size() + heap.noOfKthNearest())); distance = m_DistanceFunction.distance(target, m_Instances.instance(i), temp.distance, m_Stats); if (distance == 0.0 && m_SkipIdentical) continue; if (distance < temp.distance) { heap.putBySubstitute(i, distance); } else if (distance == temp.distance) { heap.putKthNearest(i, distance); } } } Instances neighbours = new Instances(m_Instances, (heap.size() + heap.noOfKthNearest())); m_Distances = new double[heap.size() + heap.noOfKthNearest()]; int[] indices = new int[heap.size() + heap.noOfKthNearest()]; int i = 1; MyHeapElement h; while (heap.noOfKthNearest() > 0) { h = heap.getKthNearest(); indices[indices.length - i] = h.index; m_Distances[indices.length - i] = h.distance; i++; } while (heap.size() > 0) { h = heap.get(); indices[indices.length - i] = h.index; m_Distances[indices.length - i] = h.distance; i++; } m_DistanceFunction.postProcessDistances(m_Distances); for (int k = 0; k < indices.length; k++) { neighbours.add(m_Instances.instance(indices[k])); } if (m_Stats != null) m_Stats.searchFinish(); return neighbours; } /** * Returns the distances of the k nearest neighbours. The kNearestNeighbours * or nearestNeighbour must always be called before calling this function. If * this function is called before calling either the kNearestNeighbours or * the nearestNeighbour, then it throws an exception. If, however, if either * of the nearestNeighbour functions are called at any point in the * past then no exception is thrown and the distances of the training set from * the last supplied target instance (to either one of the nearestNeighbour * functions) is/are returned. * * @return array containing the distances of the * nearestNeighbours. The length and ordering of the * array is the same as that of the instances returned * by nearestNeighbour functions. * @throws Exception if called before calling kNearestNeighbours * or nearestNeighbours. */ public double[] getDistances() throws Exception { if (m_Distances == null) throw new Exception("No distances available. Please call either " + "kNearestNeighbours or nearestNeighbours first."); return m_Distances; } /** * Sets the instances comprising the current neighbourhood. * * @param insts The set of instances on which the nearest neighbour * search is carried out. Usually this set is the * training set. * @throws Exception if setting of instances fails */ public void setInstances(Instances insts) throws Exception { m_Instances = insts; m_DistanceFunction.setInstances(insts); } /** * Updates the LinearNNSearch to cater for the new added instance. This * implementation only updates the ranges of the DistanceFunction class, * since our set of instances is passed by reference and should already have * the newly added instance. * * @param ins The instance to add. Usually this is the instance that * is added to our neighbourhood i.e. the training * instances. * @throws Exception if the given instances are null */ public void update(Instance ins) throws Exception { if (m_Instances == null) throw new Exception( "No instances supplied yet. Cannot update without" + "supplying a set of instances first."); m_DistanceFunction.update(ins); } /** * Adds the given instance info. This implementation updates the range * datastructures of the DistanceFunction class. * * @param ins The instance to add the information of. Usually this is * the test instance supplied to update the range of * attributes in the distance function. */ public void addInstanceInfo(Instance ins) { if (m_Instances != null) try { update(ins); } catch (Exception ex) { ex.printStackTrace(); } } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision$"); } }