Java tutorial
/* * Copyright (C) 2009-2010 Institute for Computational Biomedicine, * Weill Medical College of Cornell University * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.cornell.med.icb.goby.modes; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import edu.cornell.med.icb.goby.algorithmic.data.Annotation; import edu.cornell.med.icb.goby.algorithmic.data.ranges.Range; import edu.cornell.med.icb.goby.algorithmic.data.ranges.Ranges; import edu.cornell.med.icb.goby.alignments.*; import edu.cornell.med.icb.identifier.DoubleIndexedIdentifier; import it.unimi.dsi.fastutil.objects.Object2IntLinkedOpenHashMap; import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2ObjectMap; import it.unimi.dsi.fastutil.objects.ObjectList; import org.apache.commons.io.IOUtils; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.Map; /** * Converts a compact alignment to plain text. * * @author Fabien Campagne */ public class SuggestPositionSlicesMode extends AbstractGobyMode { /** * The mode name. */ private static final String MODE_NAME = "suggest-position-slices"; /** * The mode description help text. */ private static final String MODE_DESCRIPTION = "Suggest how to slice an alignment by position to yield roughly equally sized slices. "; /** * The output file. */ private String outputFilename; /** * The basename of the compact alignment. */ private String[] basenames; private int modulo; private int numberOfSlices; private int bufferLength; /** * Optional: file name for an annotation file. */ private String annotationFilename; @Override public String getModeName() { return MODE_NAME; } @Override public String getModeDescription() { return MODE_DESCRIPTION; } /** * Configure. * * @param args command line arguments * @return this object for chaining * @throws java.io.IOException error parsing * @throws com.martiansoftware.jsap.JSAPException * error parsing */ @Override public AbstractCommandLineMode configure(final String[] args) throws IOException, JSAPException { final JSAPResult jsapResult = parseJsapArguments(args); final String[] inputFiles = jsapResult.getStringArray("input"); basenames = AlignmentReaderImpl.getBasenames(inputFiles); annotationFilename = jsapResult.getString("annotations"); outputFilename = jsapResult.getString("output"); modulo = jsapResult.getInt("modulo"); numberOfSlices = jsapResult.getInt("number-of-slices"); return this; } /** * Suggests slices to process a large alignment file in parallel. * * @throws java.io.IOException error reading / writing */ @Override public void execute() throws IOException { PrintStream stream = null; try { stream = outputFilename == null ? System.out : new PrintStream(new FileOutputStream(outputFilename)); ConcatSortedAlignmentReader input = new ConcatSortedAlignmentReader(basenames); input.readHeader(); DoubleIndexedIdentifier ids = new DoubleIndexedIdentifier(input.getTargetIdentifiers()); ObjectList<ReferenceLocation> locations = input.getLocations(modulo); Ranges ranges = null; if (annotationFilename != null) { ranges = convertAnnotationsToRanges(annotationFilename, ids, input); } if (locations.size() < numberOfSlices) { numberOfSlices = locations.size(); } ReferenceLocation first; ReferenceLocation[] breakpoints = new ReferenceLocation[numberOfSlices + 1]; breakpoints[0] = first = locations.get(0); locations.remove(first); stream.println( "targetIdStart\t%positionStart\tstart:(ref,pos)\ttargetIdEnd\t%positionEnd\tend:(ref,pos)"); for (int i = 0; i < numberOfSlices - 1; i++) { breakpoints[i + 1] = locations.get(locations.size() / (numberOfSlices - 1) * i); } // largest position in the last reference sequence: final int lastTargetIndex = ids.size() - 1; breakpoints[breakpoints.length - 1] = new ReferenceLocation(lastTargetIndex, input.getTargetLength(lastTargetIndex)); if (ranges != null) { adjustBreakpointsWithAnnotations(breakpoints, ranges); } for (int i = 0; i < numberOfSlices; i++) { stream.printf(String.format("%s\t%d\t%s,%d\t%s\t%d\t%s,%d%n", ids.getId(breakpoints[i].targetIndex), breakpoints[i].position, ids.getId(breakpoints[i].targetIndex), breakpoints[i].position, ids.getId(breakpoints[i + 1].targetIndex), breakpoints[i + 1].position, ids.getId(breakpoints[i + 1].targetIndex), breakpoints[i + 1].position)); } } finally { if (stream != System.out) { IOUtils.closeQuietly(stream); } } } private void adjustBreakpointsWithAnnotations(final ReferenceLocation[] breakpoints, final Ranges ranges) { ranges.order(); for (final ReferenceLocation breakpoint : breakpoints) { int referenceIndex = breakpoint.targetIndex; int position = breakpoint.position; Range nonOverlapping = ranges.findNonOverlappingRange(referenceIndex, position); // change the breakpoint to a position in the middle of the non-overlapping segment: breakpoint.position = (nonOverlapping.min + nonOverlapping.max) / 2; } } private Ranges convertAnnotationsToRanges(String annotationFilename, DoubleIndexedIdentifier ids, ConcatSortedAlignmentReader input) throws IOException { Ranges ranges = new Ranges(); if (annotationFilename == null) return null; Object2ObjectMap<String, ObjectList<Annotation>> annotations = CompactAlignmentToAnnotationCountsMode .readAnnotations(annotationFilename); Object2IntMap<String> starts = new Object2IntLinkedOpenHashMap<String>(); Object2IntMap<String> ends = new Object2IntLinkedOpenHashMap<String>(); Object2IntMap<String> refIndices = new Object2IntLinkedOpenHashMap<String>(); starts.defaultReturnValue(Integer.MAX_VALUE); ends.defaultReturnValue(Integer.MIN_VALUE); for (ObjectList<Annotation> value : annotations.values()) { for (Annotation ann : value) { int min = Integer.MAX_VALUE; int max = Integer.MIN_VALUE; String chromosome = ann.getChromosome(); String id = ann.getId(); min = starts.getInt(id); max = ends.getInt(id); starts.put(id, Math.min(min, ann.getStart())); ends.put(id, Math.max(max, ann.getEnd())); assert chromosome != null : " annotation must contain non null chromosome."; } } for (final String id : starts.keySet()) { final Range range = new Range(); final int referenceIndex = refIndices.getInt(id); range.min = starts.getInt(id); range.max = ends.getInt(id); ranges.add(range, referenceIndex); } ranges.order(); return ranges; } /** * Main method. * * @param args command line args. * @throws com.martiansoftware.jsap.JSAPException * error parsing * @throws java.io.IOException error parsing or executing. */ public static void main(final String[] args) throws JSAPException, IOException { new SuggestPositionSlicesMode().configure(args).execute(); } }