Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package pathwaynet; import java.util.HashMap; import java.util.Map; import java.util.HashSet; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.function.Predicate; import pathwaynet.annotation.Pathway; import pathwaynet.annotation.Enzyme; import pathwaynet.clustering.HierarchicalClusterer; import edu.uci.ics.jung.algorithms.shortestpath.DijkstraDistance; import edu.uci.ics.jung.graph.Graph; import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; import org.apache.commons.math3.stat.inference.MannWhitneyUTest; import org.apache.commons.math3.stat.descriptive.moment.Mean; import org.apache.commons.math3.stat.descriptive.rank.Median; /** * * @author Chih-sung */ public class PathwayCalculator { private Pathway pathway; private int numPermutations; private boolean cache; public final static int PERMUTATION_BASED = 1; public final static int WILCOXON_BASED = 2; public PathwayCalculator(Pathway pathway, int numPermutations, boolean cache) { this.pathway = pathway; this.numPermutations = numPermutations; this.cache = cache; } public PathwayCalculator(Pathway pathway, int numPermutations) { this(pathway, numPermutations, false); } public PathwayCalculator(Pathway pathway) { this(pathway, 100, false); } public void setNumPermutations(int num) { numPermutations = num; } public int getNumPermutations() { return numPermutations; } public void setCache(boolean cache) { this.cache = cache; } public boolean cached() { return cache; } private <T> HashMap<T, Map<T, Number>> getDistancesWithGroup(HashMap<T, Map<T, Number>> distancesMap, Collection<T> componentsInGroup, Collection<T> componentsConsidered, boolean onlyFromSource, boolean insideGroup) { // get the in-group set and out-group set of enzymes HashSet<T> componentsOutsideGroupAvailable = new HashSet<>(); componentsOutsideGroupAvailable.addAll(distancesMap.keySet()); componentsOutsideGroupAvailable.retainAll(componentsConsidered); componentsOutsideGroupAvailable.removeAll(componentsInGroup); HashSet<T> componentsInGroupAvailable = new HashSet<>(); componentsInGroupAvailable.addAll(distancesMap.keySet()); componentsInGroupAvailable.retainAll(componentsConsidered); componentsInGroupAvailable.removeAll(componentsOutsideGroupAvailable); // obtain distance HashMap<T, Map<T, Number>> distancesFromGroup = new HashMap<>(); if (insideGroup && componentsInGroupAvailable.size() < 2) System.err.println("WARNING: Fewer than TWO given components are involved in the pathway."); else if ((!insideGroup) && (componentsInGroupAvailable.isEmpty() || componentsOutsideGroupAvailable.isEmpty())) System.err.println( "WARNING: There is either no or full overlap between the given components and the ones involving in the pathway."); else { componentsInGroupAvailable.stream().forEach((component1) -> { distancesFromGroup.put(component1, new HashMap<>()); distancesMap.keySet().stream().forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component1, component2, onlyFromSource); if (insideGroup && (componentsInGroupAvailable.contains(component2) && (!component1.equals(component2))) && minDist != null) { distancesFromGroup.get(component1).put(component2, minDist); } else if ((!insideGroup) && componentsOutsideGroupAvailable.contains(component2) && minDist != null) { distancesFromGroup.get(component1).put(component2, minDist); } }); //System.err.println(component1 + "\t" + componentsInGroupAvailable.size() +"\t" + componentsOutsideGroupAvailable.size() + "\t" + distancesFromGroup.get(component1).values()); }); } return distancesFromGroup; } private <E> TestResultForList testForGivenListWilcoxBased(Graph<E, String> graph, Collection<E> componentsInGroup, Collection<E> componentsConsidered, boolean onlyFromSource) { double p = Double.NaN; double meanInGroup = Double.NaN; double meanOutGroup = Double.NaN; // calculate and cache all distances DijkstraDistance<E, String> distances = new DijkstraDistance<>(graph); HashMap<E, Map<E, Number>> distancesMap = new HashMap<>(); graph.getVertices().stream().forEach((component) -> { Map<E, Number> distancesFromThis = distances.getDistanceMap(component); distancesMap.put(component, distancesFromThis); }); // generate in-group list and out-group list HashSet<E> componentsInGroupAvailable = new HashSet<>(); HashSet<E> componentsOutGroupAvailable = new HashSet<>(); componentsInGroupAvailable.addAll(graph.getVertices()); componentsOutGroupAvailable.addAll(graph.getVertices()); componentsInGroupAvailable.retainAll(componentsConsidered); componentsOutGroupAvailable.retainAll(componentsConsidered); componentsInGroupAvailable.retainAll(componentsInGroup); componentsOutGroupAvailable.removeAll(componentsInGroup); // store within-group distances for the given list, as well as distances between components in the list and ones out of the list ArrayList<Double> distInGroup = new ArrayList<>(); ArrayList<Double> distOutGroup = new ArrayList<>(); componentsInGroupAvailable.forEach((component1) -> { componentsInGroupAvailable.forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component1, component2, onlyFromSource); if (!component1.equals(component2) && minDist != null) { distInGroup.add(minDist.doubleValue()); } }); componentsOutGroupAvailable.forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component1, component2, onlyFromSource); if (!component1.equals(component2) && minDist != null) { distOutGroup.add(minDist.doubleValue()); } }); }); if (distInGroup.size() >= 2 && distOutGroup.size() >= 2) { double[] distInGroupVector = new double[distInGroup.size()]; double[] distOutGroupVector = new double[distOutGroup.size()]; for (int i = 0; i < distInGroup.size(); i++) distInGroupVector[i] = distInGroup.get(i); for (int i = 0; i < distOutGroup.size(); i++) distOutGroupVector[i] = distOutGroup.get(i); p = new MannWhitneyUTest().mannWhitneyUTest(distInGroupVector, distOutGroupVector); //System.err.println(distInGroup+"\t"+distOutGroup); meanInGroup = new Mean().evaluate(distInGroupVector); meanOutGroup = new Mean().evaluate(distOutGroupVector); } else { System.err.println("Error: please check your list: too few components involving in the pathway"); } return new TestResultForList(p, meanInGroup, meanOutGroup); } public TestResultForList testForGivenListWilcoxBased(Collection<Enzyme> enzymesInGroup, Collection<Enzyme> enzymesConsidered, boolean onlyFromSource) { Graph<Enzyme, String> graph = pathway.generateEnzymeGraph(); return PathwayCalculator.this.testForGivenListWilcoxBased(graph, enzymesInGroup, enzymesConsidered, onlyFromSource); } public TestResultForList testForGivenListWilcoxBased(Collection<Enzyme> enzymesInGroup, boolean onlyFromSource) { return PathwayCalculator.this.testForGivenListWilcoxBased(enzymesInGroup, pathway.getEnzymes(), onlyFromSource); } private <E> TestResultForList testForGivenListPermutationBased(Graph<E, String> graph, Collection<E> componentsInGroup, Collection<E> componentsConsidered, boolean onlyFromSource) { // calculate and cache all distances DijkstraDistance<E, String> distances = new DijkstraDistance<>(graph); HashMap<E, Map<E, Number>> distancesMap = new HashMap<>(); graph.getVertices().stream().forEach((component) -> { Map<E, Number> distancesFromThis = distances.getDistanceMap(component); distancesMap.put(component, distancesFromThis); }); // generate in-group list and all-available list HashSet<E> componentsInGroupAvailable = new HashSet<>(); componentsInGroupAvailable.addAll(graph.getVertices()); componentsInGroupAvailable.retainAll(componentsConsidered); componentsInGroupAvailable.retainAll(componentsInGroup); HashSet<E> componentsAvailable = new HashSet<>(); componentsAvailable.addAll(graph.getVertices()); componentsAvailable.retainAll(componentsConsidered); // store within-group distances for the given list if (componentsInGroupAvailable.size() < 2 || componentsInGroupAvailable.size() > componentsAvailable.size() - 2) { System.err.println( "ERROE! Please check your component list: too few or too many components in the list."); return new TestResultForList(Double.NaN, Double.NaN, Double.NaN); } ArrayList<Double> distInGroup = new ArrayList<>(); componentsInGroupAvailable.forEach((component1) -> { componentsInGroupAvailable.forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component1, component2, onlyFromSource); if (!component1.equals(component2) && minDist != null) { distInGroup.add(minDist.doubleValue()); } }); }); double[] distInGroupVector = new double[distInGroup.size()]; for (int i = 0; i < distInGroup.size(); i++) distInGroupVector[i] = distInGroup.get(i); double meanInGroup = new Mean().evaluate(distInGroupVector); // permutations ArrayList<Double> meansInPermutGroups = new ArrayList<>(); ArrayList<HashSet<E>> permutatedGroups = generatePermutatedGroups(componentsAvailable, componentsInGroupAvailable.size()); permutatedGroups.forEach((componentsInPermutatedGroup) -> { ArrayList<Double> distInPermutGroup = new ArrayList<>(); componentsInPermutatedGroup.forEach((component1) -> { componentsInPermutatedGroup.forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component1, component2, onlyFromSource); if (!component1.equals(component2) && minDist != null) { distInPermutGroup.add(minDist.doubleValue()); } }); }); double[] distInPermutGroupVector = new double[distInPermutGroup.size()]; for (int i = 0; i < distInPermutGroup.size(); i++) distInPermutGroupVector[i] = distInPermutGroup.get(i); meansInPermutGroups.add(new Mean().evaluate(distInPermutGroupVector)); }); double[] meansInPermutGroupsVector = new double[meansInPermutGroups.size()]; for (int i = 0; i < meansInPermutGroups.size(); i++) meansInPermutGroupsVector[i] = meansInPermutGroups.get(i); double medianPermut = new Median().evaluate(meansInPermutGroupsVector); double p = ((double) meansInPermutGroups.stream().filter(notGreaterThan(meanInGroup)).count()) / meansInPermutGroups.size(); return new TestResultForList(p, meanInGroup, medianPermut); } public TestResultForList testForGivenListPermutationBased(Collection<Enzyme> enzymesInGroup, Collection<Enzyme> enzymesConsidered, boolean onlyFromSource) { Graph<Enzyme, String> graph = pathway.generateEnzymeGraph(); return PathwayCalculator.this.testForGivenListPermutationBased(graph, enzymesInGroup, enzymesConsidered, onlyFromSource); } public TestResultForList testForGivenListPermutationBased(Collection<Enzyme> enzymesInGroup, boolean onlyFromSource) { return PathwayCalculator.this.testForGivenListPermutationBased(enzymesInGroup, pathway.getEnzymes(), onlyFromSource); } private <E> void testForGivenListWithClustering(Graph<E, String> graph, Collection<E> componentsInGroup, Collection<E> componentsConsidered, int clusteringMethod, int test) { HashSet<E> componentsInGroupAvailable = new HashSet<>(); componentsInGroupAvailable.addAll(componentsInGroup); componentsInGroupAvailable.retainAll(componentsConsidered); HierarchicalClusterer<E> clusterer = new HierarchicalClusterer(graph, componentsInGroupAvailable, clusteringMethod); ArrayList<ArrayList<E>> clusters = clusterer.getReasonableNumberOfClusters(); ArrayList<TestResultForList> testResults = new ArrayList<>(); clusters.forEach((componentsInCluster) -> { if (componentsInCluster.size() >= 2) { if (test == PERMUTATION_BASED) testResults.add(PathwayCalculator.this.testForGivenListPermutationBased(graph, componentsInCluster, componentsConsidered, false)); else if (test == WILCOXON_BASED) testResults.add(PathwayCalculator.this.testForGivenListWilcoxBased(graph, componentsInCluster, componentsConsidered, false)); else testResults.add(null); } else testResults.add(null); }); for (int i = 0; i < testResults.size(); i++) { if (testResults.get(i) != null) System.out.println(clusters.get(i) + "\t" + testResults.get(i)); } } public <E> void testForGivenListWithClustering(Collection<Enzyme> enzymesInGroup, Collection<Enzyme> enzymesConsidered, int clusteringMethod, int test) { Graph<Enzyme, String> graph = pathway.generateEnzymeGraph(); PathwayCalculator.this.testForGivenListWithClustering(graph, enzymesInGroup, enzymesConsidered, clusteringMethod, test); } public <E> void testForGivenListWithClustering(Collection<Enzyme> enzymesInGroup, int clusteringMethod, int test) { PathwayCalculator.this.testForGivenListWithClustering(enzymesInGroup, pathway.getEnzymes(), clusteringMethod, test); } public <E> void testForGivenListWithClustering(Collection<Enzyme> enzymesInGroup, int test) { PathwayCalculator.this.testForGivenListWithClustering(enzymesInGroup, pathwaynet.clustering.HierarchicalClusterer.SINGLE_LINKAGE, test); } private <E> HashMap<E, TestResultForEachVertex> testForEachComponent(Graph<E, String> graph, Collection<E> componentsInGroup, Collection<E> componentsConsidered, boolean onlyFromSource) { HashMap<E, TestResultForEachVertex> significance = new HashMap<>(); // calculate and cache all distances DijkstraDistance<E, String> distances = new DijkstraDistance<>(graph); HashMap<E, Map<E, Number>> distancesMap = new HashMap<>(); graph.getVertices().stream().forEach((component) -> { Map<E, Number> distancesFromThis = distances.getDistanceMap(component); distancesMap.put(component, distancesFromThis); }); // calculate real in-group and out-group distances HashMap<E, Map<E, Number>> distancesInsideGroup = getDistancesWithGroup(distancesMap, componentsInGroup, componentsConsidered, onlyFromSource, true); HashMap<E, Map<E, Number>> distancesOutsideGroup = getDistancesWithGroup(distancesMap, componentsInGroup, componentsConsidered, onlyFromSource, false); if (distancesInsideGroup.isEmpty() || distancesOutsideGroup.isEmpty()) { System.err.println("WARNING: Please double check the enzyme list!"); } else { HashMap<E, ArrayList<Double>> differencesProp = new HashMap<>(); distancesInsideGroup.keySet().stream().forEach((component) -> { ArrayList<Double> diffIncreaseProp = estimateDifferenceOfProportionAtDistances( distancesInsideGroup.get(component).values(), distancesOutsideGroup.get(component).values()); differencesProp.put(component, diffIncreaseProp); //System.err.println(enzyme.getID()+"\t"+diffIncreaseProp); }); // for each enzyme in the given group, estimate its significance of neighbor enrichment of enzymes in the group //System.err.println(); distancesInsideGroup.keySet().stream().forEach((component) -> { // do permutation (for numPermutations times) to generate random group with the same size and with this enzyme HashSet<E> allComponentsAvailable = new HashSet<>(); allComponentsAvailable.addAll(graph.getVertices()); allComponentsAvailable.retainAll(componentsConsidered); ArrayList<HashSet<E>> componentsInGroupPermutations = generatePermutatedGroupsWithFixedNode( component, allComponentsAvailable, distancesInsideGroup.size()); // for each permutation, calculate the differences of proportion between within-group and between-group path at each path length ArrayList<ArrayList<Double>> differencesPropPermutations = new ArrayList<>(); componentsInGroupPermutations.stream().forEach((componentsInGroupThisPermutation) -> { HashSet<E> componentsOutGroupThisPermutation = new HashSet<>(); componentsOutGroupThisPermutation.addAll(graph.getVertices()); componentsOutGroupThisPermutation.removeAll(componentsInGroupThisPermutation); HashMap<E, Number> distancesInPermut = new HashMap<>(); HashMap<E, Number> distancesOutPermut = new HashMap<>(); allComponentsAvailable.forEach((component2) -> { Number minDist = getShortestDistance(distancesMap, component, component2, onlyFromSource); if (componentsInGroupThisPermutation.contains(component2) && (!component.equals(component2)) && minDist != null) distancesInPermut.put(component2, minDist); else if (componentsOutGroupThisPermutation.contains(component2) && minDist != null) distancesOutPermut.put(component2, minDist); }); differencesPropPermutations.add(estimateDifferenceOfProportionAtDistances( distancesInPermut.values(), distancesOutPermut.values())); }); // calculate the significance // P: based on Pearson's correlation between differences of proportions and distances // domain: based on the quantile of difference at each distance //System.err.println(component); double p = calculatePValue(differencesProp.get(component), differencesPropPermutations); int radius = estimateDomainRadius(differencesProp.get(component), differencesPropPermutations, 0.9); significance.put(component, new TestResultForEachVertex(p, radius)); if (cache) { } }); } return significance; } public HashMap<Enzyme, TestResultForEachVertex> testForEachComponent(Collection<Enzyme> enzymesInGroup, Collection<Enzyme> enzymesConsidered, boolean onlyFromSource) { Graph<Enzyme, String> graph = pathway.generateEnzymeGraph(); return PathwayCalculator.this.testForEachComponent(graph, enzymesInGroup, enzymesConsidered, onlyFromSource); } public HashMap<Enzyme, TestResultForEachVertex> testForEachComponent(Collection<Enzyme> enzymesInGroup, boolean onlyFromSource) { return PathwayCalculator.this.testForEachComponent(enzymesInGroup, pathway.getEnzymes(), onlyFromSource); } private <T> ArrayList<HashSet<T>> generatePermutatedGroupsWithFixedNode(T thisComponent, Collection<T> allComponents, int groupSize) { ArrayList<HashSet<T>> componentsInGroupPermutations = new ArrayList<>(); for (int i = 0; i < numPermutations; i++) { HashSet<T> componentsThisPermut = new HashSet<>(); componentsThisPermut.add(thisComponent); ArrayList<T> componentsInPathway = new ArrayList<>(); componentsInPathway.addAll(allComponents); componentsInPathway.remove(thisComponent); Collections.shuffle(componentsInPathway); componentsThisPermut.addAll(componentsInPathway.subList(0, groupSize - 1)); componentsInGroupPermutations.add(componentsThisPermut); } return componentsInGroupPermutations; } private <T> ArrayList<HashSet<T>> generatePermutatedGroups(Collection<T> allComponents, int groupSize) { ArrayList<HashSet<T>> componentsInGroupPermutations = new ArrayList<>(); for (int i = 0; i < numPermutations; i++) { HashSet<T> componentsThisPermut = new HashSet<>(); ArrayList<T> componentsInPathway = new ArrayList<>(); componentsInPathway.addAll(allComponents); Collections.shuffle(componentsInPathway); componentsThisPermut.addAll(componentsInPathway.subList(0, groupSize)); componentsInGroupPermutations.add(componentsThisPermut); } return componentsInGroupPermutations; } private double calculatePValue(ArrayList<Double> differencesPropReal, ArrayList<ArrayList<Double>> differencesPropPermut) { double[] differencesPropVector = new double[differencesPropReal.size()]; double[] distanceThreshold = new double[differencesPropReal.size()]; for (int i = 0; i < differencesPropReal.size(); i++) { differencesPropVector[i] = differencesPropReal.get(i); distanceThreshold[i] = i + 1; } Double corrReal = new PearsonsCorrelation().correlation(differencesPropVector, distanceThreshold); //System.err.println(corrReal); ArrayList<Double> corrPermut = new ArrayList<>(); differencesPropPermut.stream().forEach((differencesPropThisPermut) -> { double[] differencesPropVectorPermut = new double[differencesPropReal.size()]; for (int i = 0; i < differencesPropReal.size(); i++) { if (differencesPropThisPermut.size() > i) differencesPropVectorPermut[i] = differencesPropThisPermut.get(i); else differencesPropVectorPermut[i] = 0; } corrPermut.add(new PearsonsCorrelation().correlation(differencesPropVectorPermut, distanceThreshold)); }); long numPermutNotLargerThanReal = corrPermut.stream().filter(notGreaterThan(corrReal)).count(); double p = ((double) numPermutNotLargerThanReal) / differencesPropPermut.size(); return p; } private int estimateDomainRadius(ArrayList<Double> differencesPropReal, ArrayList<ArrayList<Double>> differencesPropPermut, double quantileCutoff) { double[] quantiles = new double[differencesPropReal.size()]; for (int i = 0; i < differencesPropReal.size(); i++) { double realDiff = differencesPropReal.get(i); int numSmallerThanReal = 0; for (int j = 0; j < differencesPropPermut.size(); j++) { double permutDiff = differencesPropPermut.get(j).size() > i ? differencesPropPermut.get(j).get(i) : 0; if (permutDiff < realDiff) numSmallerThanReal++; } quantiles[i] = ((double) numSmallerThanReal) / differencesPropPermut.size(); } //System.err.println(Arrays.toString(quantiles)); int radius = 0; for (int i = 0; i < quantiles.length; i++) { if (quantiles[i] > quantileCutoff && (i + 1) > radius) radius = i + 1; } return radius; } private ArrayList<Double> estimateDifferenceOfProportionAtDistances(Collection<Number> distancesIn, Collection<Number> distancesOut) { int maxDistanceIn = distancesIn.isEmpty() ? 0 : distancesIn.stream().max((Number o1, Number o2) -> { if (o1 == null && o2 == null) return 0; if (o1 == null) return -1; if (o2 == null) return 1; return o1.intValue() - o2.intValue(); }).get().intValue(); int maxDistanceOut = distancesOut.isEmpty() ? 0 : distancesOut.stream().max((Number o1, Number o2) -> { if (o1 == null && o2 == null) return 0; if (o1 == null) return -1; if (o2 == null) return 1; return o1.intValue() - o2.intValue(); }).get().intValue(); int maxDistance = maxDistanceIn > maxDistanceOut ? maxDistanceIn : maxDistanceOut; ArrayList<Double> differences = new ArrayList<>(); for (int i = 1; i <= maxDistance; i++) { double propWithDistIn = distancesIn.isEmpty() ? 0 : ((double) distancesIn.stream().filter(equalsTo(i)).count()) / distancesIn.size(); double propWithDistOut = distancesOut.isEmpty() ? 0 : ((double) distancesOut.stream().filter(equalsTo(i)).count()) / distancesOut.size(); differences.add(propWithDistIn - propWithDistOut); } return differences; } private Predicate<Number> equalsTo(int i) { return p -> p != null && p.intValue() == i; } private Predicate<Double> notGreaterThan(Double x) { return p -> x.toString().equals("NaN") || ((!p.toString().equals("NaN")) && p <= x); } private Predicate<Double> notLessThan(Double x) { return p -> p >= x; } private <T> Number getShortestDistance(HashMap<T, Map<T, Number>> distancesMap, T component1, T component2, boolean onlyFromSource) { Number minDist = null; if (onlyFromSource && distancesMap.get(component1).containsKey(component2)) { minDist = distancesMap.get(component1).get(component2); } else if ((!onlyFromSource) && distancesMap.get(component1).containsKey(component2) && distancesMap.get(component2).containsKey(component1)) { minDist = distancesMap.get(component1).get(component2).doubleValue() > distancesMap.get(component2) .get(component1).doubleValue() ? distancesMap.get(component2).get(component1) : distancesMap.get(component1).get(component2); } else if ((!onlyFromSource) && distancesMap.get(component1).containsKey(component2)) { minDist = distancesMap.get(component1).get(component2); } else if ((!onlyFromSource) && distancesMap.get(component2).containsKey(component1)) { minDist = distancesMap.get(component2).get(component1); } return minDist; } }