org.broad.igv.feature.FeatureUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.broad.igv.feature.FeatureUtils.java

Source

/*
 * Copyright (c) 2007-2012 The Broad Institute, Inc.
 * SOFTWARE COPYRIGHT NOTICE
 * This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
 *
 * This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 */

/*
 * FeatureUtils.java
 *
 * Useful utilities for working with Features
 */
package org.broad.igv.feature;

import org.apache.commons.collections.Predicate;
import org.broad.tribble.Feature;

import java.util.*;

/**
 * @author jrobinso
 */
public class FeatureUtils {

    public static Predicate<Feature> getOverlapPredicate(final String chr, final int start, final int end) {
        Predicate<Feature> overlapPredicate = new Predicate<Feature>() {
            @Override
            public boolean evaluate(Feature object) {
                return chr.equals(object.getChr()) && object.getStart() <= end && object.getEnd() >= start;
            }
        };
        return overlapPredicate;
    }

    public static Map<String, List<IGVFeature>> divideByChromosome(List<IGVFeature> features) {
        Map<String, List<IGVFeature>> featureMap = new LinkedHashMap();
        for (IGVFeature f : features) {
            List<IGVFeature> flist = featureMap.get(f.getChr());
            if (flist == null) {
                flist = new ArrayList();
                featureMap.put(f.getChr(), flist);
            }
            flist.add(f);
        }
        return featureMap;
    }

    /**
     * Segregate a list of possibly overlapping features into a list of
     * non-overlapping lists of features.
     */
    public static List<List<IGVFeature>> segreateFeatures(List<IGVFeature> features, double scale) {

        // Create a list to hold the lists of non-overlapping features
        List<List<IGVFeature>> segmentedLists = new ArrayList();

        // Make a working copy of the original list.
        List<IGVFeature> workingList = new LinkedList(features);
        sortFeatureList(workingList);

        // Loop until all features have been allocated to non-overlapping lists
        while (workingList.size() > 0) {

            List<IGVFeature> nonOverlappingFeatures = new LinkedList();
            List<IGVFeature> overlappingFeatures = new LinkedList();

            // Prime the loop with the first feature, it can't overlap itself
            IGVFeature f1 = workingList.remove(0);
            nonOverlappingFeatures.add(f1);
            while (workingList.size() > 0) {
                IGVFeature f2 = workingList.remove(0);
                int scaledStart = (int) (f2.getStart() / scale);
                int scaledEnd = (int) (f1.getEnd() / scale);
                if (scaledStart > scaledEnd) {
                    nonOverlappingFeatures.add(f2);
                    f1 = f2;
                } else {
                    overlappingFeatures.add(f2);
                }
            }

            // Add the list of non-overlapping features and start again with whats left
            segmentedLists.add(nonOverlappingFeatures);
            workingList = overlappingFeatures;
        }
        return segmentedLists;
    }

    /**
     * Sort the feature list by ascending start value
     */
    public static void sortFeatureList(List<? extends Feature> features) {
        Collections.sort(features, FEATURE_START_COMPARATOR);
    }

    /**
     * Null safe version of {@linkplain #combineSortedFeatureListsNoDups(java.util.Iterator, java.util.Iterator, int, int)}
     * If BOTH self and other are null, returns null. If only one is null,
     * returns the other
     *
     * @param self
     * @param other
     * @param start
     * @param end
     * @return
     */
    public static List combineSortedFeatureListsNoDups(List self, List other, int start, int end) {
        if (self == null && other == null) {
            return null;
        } else if (self == null) {
            return other;
        } else if (other == null) {
            return self;
        }

        return combineSortedFeatureListsNoDups(self.iterator(), other.iterator(), start, end);
    }

    /**
     * Features are sorted by start position. The interval being merged
     * will have some features on the left or right that the current
     * interval does not have. Both are sorted by start position.
     * So we first add at the beginning, and then the end,
     * only those alignments which don't overlap the original interval.
     * <p/>
     * NOTE: WE DO NOT USE GENERICS PROPERLY SO WE CAN REUSE THIS METHOD.
     * BE CAREFUL.
     *
     * @param selfIter  iterator of features belonging to this interval
     * @param otherIter iterator of features belonging to some other interval
     * @param start the beginning of the interval from which selfIter was derived
     * @param end   the end of the interval from which selfIter was derived
     * @return Combined sorted list.
     * @throws ClassCastException If the elements of an iterator cannot be cast
     *                            to a Feature.
     */
    public static List combineSortedFeatureListsNoDups(Iterator selfIter, Iterator otherIter, int start, int end) {
        List<Feature> allFeatures = new ArrayList<Feature>();
        Feature otherFeat = null;

        while (otherIter.hasNext()) {
            otherFeat = (Feature) otherIter.next();
            if (otherFeat.getEnd() > start)
                break;
            allFeatures.add(otherFeat);
        }

        while (selfIter.hasNext()) {
            allFeatures.add((Feature) selfIter.next());
        }

        while (otherIter.hasNext()) {
            if (otherFeat.getStart() >= end) {
                allFeatures.add(otherFeat);
            }
            otherFeat = (Feature) otherIter.next();
        }

        if (otherFeat != null && otherFeat.getStart() >= end) {
            allFeatures.add(otherFeat);
        }

        return allFeatures;
    }

    /**
     * Return a feature from the supplied list at the given position.
     *
     * @param position 0-based genomic position to which to search for feature
     * @param buffer   search region. The first feature which contains the start position, (expanded by buffer, inclusive)
     *                 will be accepted.
     * @param features
     * @return
     */
    public static Feature getFeatureAt(double position, int buffer, List<? extends Feature> features) {

        int startIdx = 0;
        int endIdx = features.size();

        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;

            org.broad.tribble.Feature feature = features.get(idx);

            int effectiveStart = feature.getStart();
            int effectiveEnd = feature.getEnd();

            if (position >= effectiveStart - buffer) {
                if (position <= effectiveEnd + buffer) {
                    return features.get(idx);
                } else {
                    if (idx == startIdx) {
                        return null;
                    } else {
                        startIdx = idx;
                    }
                }
            } else {
                endIdx = idx;
            }
        }

        return null;
    }

    /**
     * Get the index of the feature just to the right of the given position.
     * If there is no feature to the right return -1;
     *
     * @param position
     * @param features
     * @return
     */
    public static Feature getFeatureAfter(double position, List<? extends Feature> features) {

        if (features.size() == 0 || features.get(features.size() - 1).getStart() <= position) {
            return null;
        }

        int startIdx = 0;
        int endIdx = features.size();

        // Narrow the list to ~ 10
        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;
            double distance = features.get(idx).getStart() - position;
            if (distance <= 0) {
                startIdx = idx;
            } else {
                endIdx = idx;
            }
            if (endIdx - startIdx < 10) {
                break;
            }
        }

        // Now find feature
        for (int idx = startIdx; idx < features.size(); idx++) {
            if (features.get(idx).getStart() > position) {
                return features.get(idx);
            }
        }

        return null;

    }

    public static Feature getFeatureBefore(double position, List<? extends Feature> features) {

        int index = getIndexBefore(position, features);
        while (index >= 0) {
            org.broad.tribble.Feature f = features.get(index);
            if (f.getStart() < position) {
                return f;
            }
            index--;
        }
        return null;

    }

    public static Feature getFeatureClosest(double position, List<? extends org.broad.tribble.Feature> features) {
        // look for exact match at position:
        org.broad.tribble.Feature f0 = getFeatureAt(position, features);
        if (f0 != null) {
            return f0;
        }
        // otherwise look for features on either side and return the closest:
        org.broad.tribble.Feature f1 = getFeatureBefore(position, features);
        org.broad.tribble.Feature f2 = getFeatureAfter(position, features);

        double d1 = f1 == null ? Double.MAX_VALUE : Math.abs(position - f1.getEnd());
        double d2 = f2 == null ? Double.MAX_VALUE : Math.abs(f2.getStart() - position);

        return (d1 < d2 ? f1 : f2);

    }

    /**
     * Return a feature that encompasses the supplied position.
     *
     * @param position Query position.
     * @param features List of features.
     * @return The feature whose start overlaps with position, or null.
     */
    private static Feature getFeatureAt(double position, List<? extends Feature> features) {
        int strt = (int) position;
        Feature key = new BasicFeature("", strt, strt + 1);

        int r = Collections.binarySearch(features, key, FEATURE_START_COMPARATOR);

        if (r >= 0) {
            return features.get(r);
        } else {
            return null;
        }
    }

    /**
     * Return the index to the last feature in the list with a start < the given position
     *
     * @param position
     * @param features
     * @return
     */
    public static int getIndexBefore(double position, List<? extends Feature> features) {

        if (features == null || features.size() == 0) {
            return -1;
        }
        if (features.get(features.size() - 1).getStart() <= position) {
            return features.size() - 1;
        }
        if (features.get(0).getStart() >= position) {
            return 0;
        }

        int startIdx = 0;
        int endIdx = features.size() - 1;

        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;
            double distance = features.get(idx).getStart() - position;
            if (distance <= 0) {
                startIdx = idx;
            } else {
                endIdx = idx;
            }
            if (endIdx - startIdx < 10) {
                break;
            }
        }

        if (features.get(endIdx).getStart() >= position) {
            for (int idx = endIdx; idx >= 0; idx--) {
                if (features.get(idx).getStart() < position) {
                    return idx;
                }
            }
        } else {
            for (int idx = endIdx + 1; idx < features.size(); idx++) {
                if (features.get(idx).getStart() >= position) {
                    return idx - 1;
                }

            }
        }
        return -1;
    }

    /**
     * Return a feature from the supplied list at the given position.
     *
     * @param position
     * @param maxLength
     * @param features
     * @return
     */
    public static List<Feature> getAllFeaturesAt(double position, double maxLength, double minWidth,
            List<? extends org.broad.tribble.Feature> features) {

        List<Feature> returnList = null;

        double adjustedPosition = Math.max(0, position - maxLength);
        int startIdx = Math.max(0, getIndexBefore(adjustedPosition, features));
        for (int idx = startIdx; idx < features.size(); idx++) {
            Feature feature = features.get(idx);
            int start = feature.getStart() - (int) (minWidth / 2);

            if (start > position) {
                break;
            }

            int end = feature.getEnd() + (int) (minWidth / 2);

            if (position >= start && position <= end) {
                if (returnList == null)
                    returnList = new ArrayList();
                returnList.add(feature);
            }
        }

        return returnList;
    }

    private static final Comparator<Feature> FEATURE_CONTAINS_COMPARATOR = new Comparator<Feature>() {
        public int compare(Feature o1, Feature o2) {
            int genomeStart2 = o2.getStart();
            int genomeStart1 = o1.getEnd();
            if (genomeStart2 >= genomeStart1 && o2.getEnd() <= o1.getEnd()) {
                return 0;
            } else {
                return genomeStart1 - genomeStart2;
            }
        }
    };

    public static final Comparator<Feature> FEATURE_START_COMPARATOR = new Comparator<Feature>() {
        public int compare(Feature o1, Feature o2) {
            return o1.getStart() - o2.getStart();
        }
    };
}