Java tutorial
/** * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package us.levk.math.linear; import static java.lang.Double.isInfinite; import static java.lang.Double.isNaN; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Setter; import lombok.ToString; import lombok.experimental.Accessors; import lombok.extern.log4j.Log4j; import org.apache.commons.math3.linear.RealMatrix; /** * @author levk * */ @Log4j public class EucledianDistanceClusterer { private Iterator<Integer> enumerator = new Iterator<Integer>() { private int counter = -1; @Override public boolean hasNext() { return true; } @Override public Integer next() { counter--; if (counter > 0) counter = -1; return counter; } @Override public void remove() { throw new UnsupportedOperationException(); } }; @RequiredArgsConstructor @Accessors(fluent = true) @ToString public class Cluster { private @Getter final int id; private @Getter @Setter double d = 0.0; private @Getter final List<Integer> contains = new ArrayList<>(); private @Getter final Cluster[] children; public Cluster(int id) { this.id = id; contains().add(id); children = null; } public Cluster(Cluster left, Cluster right) { id = enumerator.next(); children = new Cluster[] { left, right }; } } public Cluster eucledian(final RealMatrix original) throws IOException { try (HugeRealMatrix distances = new HugeRealMatrix(original.getRowDimension(), original.getRowDimension())) { final Map<Integer, Cluster> genehash = new HashMap<Integer, Cluster>() { private static final long serialVersionUID = 1L; { for (int index = original.getRowDimension(); --index >= 0; put(index, new Cluster(index))) ; } }; TreeMap<Double, int[]> sorted = new TreeMap<>(); log.debug("Populating distance matrix"); for (int i = 0; i < original.getRowDimension(); i++) { for (int j = i + 1; j < original.getRowDimension(); j++) { // Euclidean distance calculation. double total = 0; for (int k = 0; k < original.getColumnDimension(); k++) { double left = original.getEntry(i, k); double right = original.getEntry(j, k); if (!isNaN(left) && !isNaN(right) && !isInfinite(left) && !isInfinite(right)) total += Math.pow(left - right, 2); } double distance = Math.pow(total, 0.5); distances.setEntry(i, j, distance); distances.setEntry(j, i, distance); int[] genePair = { i, j }; // Enter the distance calculated and the genes measured into a // treemap. Will be automatically sorted. sorted.put(distance, genePair); } } log.debug("Initialized distances matrix " + distances); while (true) { // Get the first key of the TreeMap. Will be the shortest distance de // facto. final double minkey = (Double) sorted.firstKey(); int[] minValues = (int[]) sorted.firstEntry().getValue(); final int value1 = minValues[0], value2 = minValues[1]; // find Cluster cluster = new Cluster(genehash.get(value1), genehash.get(value2)) { { log.debug("Generating cluster from " + value1 + " and " + value2 + " in " + genehash); contains().addAll(genehash.get(value1).contains()); contains().addAll(genehash.get(value2).contains()); d(minkey); log.debug("Generated cluster " + this); } }; genehash.put(cluster.id(), cluster); genehash.remove(value1); genehash.remove(value2); if (genehash.size() <= 1) break; // Iterate over all the current clusters to remeasure distance with the // previously clustered group. for (Cluster c : genehash.values()) { // Skip measuring the new cluster with itself. if (c == cluster) continue; double distance = 0; int n = 0; // Get genes from each cluster. Distance is measured from each element // to every element. for (int current : c.contains()) for (int created : cluster.contains()) { distance += distances.getEntry(current, created); n++; } distance = distance / n; int[] valuePair = { c.id(), cluster.id() }; sorted.put(distance, valuePair); } // Get the shortest distance. // Check to make sure shortest distance does not include a gene pair // that // has already had its elements clustered. boolean minimized = false; while (!minimized) { double mk = sorted.firstKey(); minValues = sorted.firstEntry().getValue(); // If the gene pair is not present in the current gene set, remove // this // distance. if (!genehash.containsKey(minValues[0]) || !genehash.containsKey(minValues[1])) sorted.remove(mk); else minimized = true; } } return genehash.entrySet().iterator().next().getValue(); } } }