Java tutorial
/* * Copyright (c) 2007-2012 The Broad Institute, Inc. * SOFTWARE COPYRIGHT NOTICE * This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved. * * This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality. * * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. */ package org.broad.igv.track; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.broad.igv.Globals; import org.broad.igv.feature.LocusScore; import org.broad.igv.feature.tribble.IGVBEDCodec; import org.broad.igv.util.RuntimeUtils; import org.broad.tribble.Feature; import java.io.*; import java.util.*; /** * A feature source which combines results from other feature sources. * Currently uses bedtools to combine results * User: jacob * Date: 2012/05/01 */ public class CombinedFeatureSource implements FeatureSource { private static Logger log = Logger.getLogger(CombinedFeatureSource.class); private FeatureSource[] sources; private Operation operation; /** * Checks the global bedtools path, to see if bedtools * is actually there. Check is 2-fold: * First, we check if path exists. * If so, we run version command * * @return */ public static boolean checkBEDToolsPathValid() { String path = Globals.BEDtoolsPath; File bedtoolsFile = new File(path); boolean pathValid = bedtoolsFile.isFile(); if (pathValid && !bedtoolsFile.canExecute()) { log.debug(path + " exists but is not executable. "); return false; } String cmd = path + " --version"; String resp; try { resp = RuntimeUtils.executeShellCommand(cmd, null, null); } catch (IOException e) { log.error(e); return false; } String line0 = resp.split("\n")[0].toLowerCase(); pathValid &= line0.contains("bedtools v"); pathValid &= !line0.contains("command not found"); return pathValid; } public CombinedFeatureSource(Collection<Track> tracks, Operation operation) { List<FeatureSource> sources = new ArrayList<FeatureSource>(tracks.size()); for (Track t : tracks) { if (t instanceof FeatureTrack) { sources.add(((FeatureTrack) t).source); } } init(sources.toArray(new FeatureSource[0]), operation); } public CombinedFeatureSource(FeatureSource[] sources, Operation operation) { init(sources, operation); } /** * If known, it is recommended that source[0] be the larger of the two. sources[1] will * be loaded into memory by BEDTools. * * @param sources * @param operation How the two sources will be combined */ private void init(FeatureSource[] sources, Operation operation) { this.sources = sources; this.operation = operation; if (sources.length != 2 && operation != Operation.MULTIINTER) { throw new IllegalArgumentException("sources must be length 2 for operation " + operation); } } /** * Stream will be closed after data written * * @param features * @param outputStream * @return */ private int writeFeaturesToStream(Iterator<Feature> features, OutputStream outputStream) { PrintWriter writer = new PrintWriter(new OutputStreamWriter(outputStream)); int allNumCols = -1; if (features != null) { IGVBEDCodec codec = new IGVBEDCodec(); while (features.hasNext()) { String data = codec.encode(features.next()); writer.println(data); //We require consistency of output int tmpNumCols = data.split("\t").length; if (allNumCols < 0) { allNumCols = tmpNumCols; } else { assert tmpNumCols == allNumCols; } } } writer.flush(); writer.close(); return allNumCols; } /** * Write out data from feature sources within the specified interval * to temporary files. * * @param chr * @param start * @param end * @return LinkedHashMap from TempFileName -> number of columns in data file * A LinkedHashMap has a predictable iteration order, which will be the same as the * insertion order, which will be the order of sources * @throws IOException */ private LinkedHashMap<String, Integer> createTempFiles(String chr, int start, int end) throws IOException { LinkedHashMap<String, Integer> tempFiles = new LinkedHashMap<String, Integer>(sources.length); for (FeatureSource source : sources) { Iterator<Feature> iter = source.getFeatures(chr, start, end); File outFile = File.createTempFile("features", ".bed", null); outFile.deleteOnExit(); int numCols = writeFeaturesToStream(iter, new FileOutputStream(outFile)); tempFiles.put(outFile.getAbsolutePath(), numCols); } return tempFiles; } /** * Perform the actual combination operation between the constituent data * sources. This implementation re-runs the operation each call. * * @param chr * @param start * @param end * @return * @throws IOException */ @Override public Iterator<Feature> getFeatures(String chr, int start, int end) throws IOException { String cmd = Globals.BEDtoolsPath + " " + this.operation.getCmd(); LinkedHashMap<String, Integer> tempFiles = createTempFiles(chr, start, end); String[] fiNames = tempFiles.keySet().toArray(new String[0]); if (operation == Operation.MULTIINTER) { assert tempFiles.size() >= 2; cmd += " -i " + StringUtils.join(tempFiles.keySet(), " "); } else { assert tempFiles.size() == 2; cmd += " -a " + fiNames[0] + " -b " + fiNames[1]; } //Start bedtools process Process pr = RuntimeUtils.startExternalProcess(cmd, null, null); //Read back in the data which bedtools output BufferedReader in = new BufferedReader(new InputStreamReader(pr.getInputStream())); BufferedReader err = new BufferedReader(new InputStreamReader(pr.getErrorStream())); List<Feature> featuresList = new ArrayList<Feature>(); IGVBEDCodec codec = new IGVBEDCodec(); String line; Feature feat; int numCols0 = tempFiles.get(fiNames[0]); int numCols1 = tempFiles.get(fiNames[1]); while ((line = in.readLine()) != null) { System.out.println(line); String[] tokens = line.split("\t"); if (operation.getCmd().contains("-split")) { //When we split, the returned feature still has the exons //We don't want to plot them all a zillion times tokens = Arrays.copyOfRange(tokens, 0, Math.min(6, tokens.length)); } if (operation == Operation.WINDOW || operation == Operation.CLOSEST) { String[] closest = Arrays.copyOfRange(tokens, numCols0, numCols0 + numCols1); //If not found, bedtools returns -1 for positions if (closest[1].trim().equalsIgnoreCase("-1")) { continue; } feat = codec.decode(closest); } else if (operation == Operation.MULTIINTER) { //We only look at regions common to ALL inputs //Columns: chr \t start \t \end \t # of files which contained this feature \t comma-separated list files +many more int numRegions = Integer.parseInt(tokens[3]); if (numRegions < sources.length) { continue; } String[] intersection = Arrays.copyOf(tokens, 3); feat = codec.decode(intersection); } else { feat = codec.decode(tokens); } featuresList.add(feat); } in.close(); while ((line = err.readLine()) != null) { log.error(line); } err.close(); return featuresList.iterator(); } @Override public List<LocusScore> getCoverageScores(String chr, int start, int end, int zoom) { return null; } /** * If this track has not had it's feature window size set, * we use the minimum of the sources * * @return */ @Override public int getFeatureWindowSize() { int featureWindowSize = Integer.MAX_VALUE; for (FeatureSource source : sources) { featureWindowSize = Math.min(featureWindowSize, source.getFeatureWindowSize()); } return featureWindowSize; } @Override public void setFeatureWindowSize(int size) { //no-op } public enum Operation { //We use these bed flags to ensure output will be in bed format, even //if input is bam //TODO Include -wo, -wb options INTERSECT("intersect -bed -split"), SUBTRACT("subtract"), //Identify the "closest" feature in file B for each feature in file A CLOSEST("closest"), //TODO include -d option WINDOW("window -bed"), COVERAGE("coverage -split"), MULTIINTER("multiinter"); private String cmd; private Operation(String cmd) { this.cmd = cmd; } public String getCmd() { return cmd; } } }