List of usage examples for java.util TreeMap lastEntry
public Map.Entry<K, V> lastEntry()
From source file:org.apache.accumulo.tserver.TabletServer.java
private TreeMap<KeyExtent, TabletData> splitTablet(Tablet tablet, byte[] splitPoint) throws IOException { long t1 = System.currentTimeMillis(); TreeMap<KeyExtent, TabletData> tabletInfo = tablet.split(splitPoint); if (tabletInfo == null) { return null; }/*ww w . j av a 2 s. c om*/ log.info("Starting split: " + tablet.getExtent()); statsKeeper.incrementStatusSplit(); long start = System.currentTimeMillis(); Tablet[] newTablets = new Tablet[2]; Entry<KeyExtent, TabletData> first = tabletInfo.firstEntry(); TabletResourceManager newTrm0 = resourceManager.createTabletResourceManager(first.getKey(), getTableConfiguration(first.getKey())); newTablets[0] = new Tablet(TabletServer.this, first.getKey(), newTrm0, first.getValue()); Entry<KeyExtent, TabletData> last = tabletInfo.lastEntry(); TabletResourceManager newTrm1 = resourceManager.createTabletResourceManager(last.getKey(), getTableConfiguration(last.getKey())); newTablets[1] = new Tablet(TabletServer.this, last.getKey(), newTrm1, last.getValue()); // roll tablet stats over into tablet server's statsKeeper object as // historical data statsKeeper.saveMajorMinorTimes(tablet.getTabletStats()); // lose the reference to the old tablet and open two new ones synchronized (onlineTablets) { onlineTablets.remove(tablet.getExtent()); onlineTablets.put(newTablets[0].getExtent(), newTablets[0]); onlineTablets.put(newTablets[1].getExtent(), newTablets[1]); } // tell the master enqueueMasterMessage(new SplitReportMessage(tablet.getExtent(), newTablets[0].getExtent(), new Text("/" + newTablets[0].getLocation().getName()), newTablets[1].getExtent(), new Text("/" + newTablets[1].getLocation().getName()))); statsKeeper.updateTime(Operation.SPLIT, start, 0, false); long t2 = System.currentTimeMillis(); log.info("Tablet split: " + tablet.getExtent() + " size0 " + newTablets[0].estimateTabletSize() + " size1 " + newTablets[1].estimateTabletSize() + " time " + (t2 - t1) + "ms"); return tabletInfo; }
From source file:org.apache.accumulo.server.tabletserver.ScanRunState.java
private TreeMap<KeyExtent, SplitInfo> splitTablet(Tablet tablet, byte[] splitPoint) throws IOException { long t1 = System.currentTimeMillis(); TreeMap<KeyExtent, SplitInfo> tabletInfo = tablet.split(splitPoint); if (tabletInfo == null) { return null; }/*from www. ja v a 2 s . c om*/ log.info("Starting split: " + tablet.getExtent()); statsKeeper.incrementStatusSplit(); long start = System.currentTimeMillis(); Tablet[] newTablets = new Tablet[2]; Entry<KeyExtent, SplitInfo> first = tabletInfo.firstEntry(); newTablets[0] = new Tablet(TabletServer.this, new Text(first.getValue().dir), first.getKey(), resourceManager.createTabletResourceManager(), first.getValue().datafiles, first.getValue().time, first.getValue().initFlushID, first.getValue().initCompactID); Entry<KeyExtent, SplitInfo> last = tabletInfo.lastEntry(); newTablets[1] = new Tablet(TabletServer.this, new Text(last.getValue().dir), last.getKey(), resourceManager.createTabletResourceManager(), last.getValue().datafiles, last.getValue().time, last.getValue().initFlushID, last.getValue().initCompactID); // roll tablet stats over into tablet server's statsKeeper object as // historical data statsKeeper.saveMinorTimes(tablet.timer); statsKeeper.saveMajorTimes(tablet.timer); // lose the reference to the old tablet and open two new ones synchronized (onlineTablets) { onlineTablets.remove(tablet.getExtent()); onlineTablets.put(newTablets[0].getExtent(), newTablets[0]); onlineTablets.put(newTablets[1].getExtent(), newTablets[1]); } // tell the master enqueueMasterMessage(new SplitReportMessage(tablet.getExtent(), newTablets[0].getExtent(), new Text("/" + newTablets[0].getLocation().getName()), newTablets[1].getExtent(), new Text("/" + newTablets[1].getLocation().getName()))); statsKeeper.updateTime(Operation.SPLIT, start, 0, false); long t2 = System.currentTimeMillis(); log.info("Tablet split: " + tablet.getExtent() + " size0 " + newTablets[0].estimateTabletSize() + " size1 " + newTablets[1].estimateTabletSize() + " time " + (t2 - t1) + "ms"); return tabletInfo; }
From source file:org.apache.nutch.segment.SegmentMerger.java
/** * NOTE: in selecting the latest version we rely exclusively on the segment * name (not all segment data contain time information). Therefore it is extremely * important that segments be named in an increasing lexicographic order as * their creation time increases.//from ww w . j av a2 s. co m */ public void reduce(Text key, Iterator<MetaWrapper> values, OutputCollector<Text, MetaWrapper> output, Reporter reporter) throws IOException { CrawlDatum lastG = null; CrawlDatum lastF = null; CrawlDatum lastSig = null; Content lastC = null; ParseData lastPD = null; ParseText lastPT = null; String lastGname = null; String lastFname = null; String lastSigname = null; String lastCname = null; String lastPDname = null; String lastPTname = null; TreeMap<String, ArrayList<CrawlDatum>> linked = new TreeMap<String, ArrayList<CrawlDatum>>(); while (values.hasNext()) { MetaWrapper wrapper = values.next(); Object o = wrapper.get(); String spString = wrapper.getMeta(SEGMENT_PART_KEY); if (spString == null) { throw new IOException("Null segment part, key=" + key); } SegmentPart sp = SegmentPart.parse(spString); if (o instanceof CrawlDatum) { CrawlDatum val = (CrawlDatum) o; // check which output dir it belongs to if (sp.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) { if (lastG == null) { lastG = val; lastGname = sp.segmentName; } else { // take newer if (lastGname.compareTo(sp.segmentName) < 0) { lastG = val; lastGname = sp.segmentName; } } } else if (sp.partName.equals(CrawlDatum.FETCH_DIR_NAME)) { if (lastF == null) { lastF = val; lastFname = sp.segmentName; } else { // take newer if (lastFname.compareTo(sp.segmentName) < 0) { lastF = val; lastFname = sp.segmentName; } } } else if (sp.partName.equals(CrawlDatum.PARSE_DIR_NAME)) { if (val.getStatus() == CrawlDatum.STATUS_SIGNATURE) { if (lastSig == null) { lastSig = val; lastSigname = sp.segmentName; } else { // take newer if (lastSigname.compareTo(sp.segmentName) < 0) { lastSig = val; lastSigname = sp.segmentName; } } continue; } // collect all LINKED values from the latest segment ArrayList<CrawlDatum> segLinked = linked.get(sp.segmentName); if (segLinked == null) { segLinked = new ArrayList<CrawlDatum>(); linked.put(sp.segmentName, segLinked); } segLinked.add(val); } else { throw new IOException("Cannot determine segment part: " + sp.partName); } } else if (o instanceof Content) { if (lastC == null) { lastC = (Content) o; lastCname = sp.segmentName; } else { if (lastCname.compareTo(sp.segmentName) < 0) { lastC = (Content) o; lastCname = sp.segmentName; } } } else if (o instanceof ParseData) { if (lastPD == null) { lastPD = (ParseData) o; lastPDname = sp.segmentName; } else { if (lastPDname.compareTo(sp.segmentName) < 0) { lastPD = (ParseData) o; lastPDname = sp.segmentName; } } } else if (o instanceof ParseText) { if (lastPT == null) { lastPT = (ParseText) o; lastPTname = sp.segmentName; } else { if (lastPTname.compareTo(sp.segmentName) < 0) { lastPT = (ParseText) o; lastPTname = sp.segmentName; } } } } // perform filtering based on full merge record if (mergeFilters != null && !mergeFilters.filter(key, lastG, lastF, lastSig, lastC, lastPD, lastPT, linked.isEmpty() ? null : linked.lastEntry().getValue())) { return; } curCount++; String sliceName = null; MetaWrapper wrapper = new MetaWrapper(); if (sliceSize > 0) { sliceName = String.valueOf(curCount / sliceSize); wrapper.setMeta(SEGMENT_SLICE_KEY, sliceName); } SegmentPart sp = new SegmentPart(); // now output the latest values if (lastG != null) { wrapper.set(lastG); sp.partName = CrawlDatum.GENERATE_DIR_NAME; sp.segmentName = lastGname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (lastF != null) { wrapper.set(lastF); sp.partName = CrawlDatum.FETCH_DIR_NAME; sp.segmentName = lastFname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (lastSig != null) { wrapper.set(lastSig); sp.partName = CrawlDatum.PARSE_DIR_NAME; sp.segmentName = lastSigname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (lastC != null) { wrapper.set(lastC); sp.partName = Content.DIR_NAME; sp.segmentName = lastCname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (lastPD != null) { wrapper.set(lastPD); sp.partName = ParseData.DIR_NAME; sp.segmentName = lastPDname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (lastPT != null) { wrapper.set(lastPT); sp.partName = ParseText.DIR_NAME; sp.segmentName = lastPTname; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); output.collect(key, wrapper); } if (linked.size() > 0) { String name = linked.lastKey(); sp.partName = CrawlDatum.PARSE_DIR_NAME; sp.segmentName = name; wrapper.setMeta(SEGMENT_PART_KEY, sp.toString()); ArrayList<CrawlDatum> segLinked = linked.get(name); for (int i = 0; i < segLinked.size(); i++) { CrawlDatum link = segLinked.get(i); wrapper.set(link); output.collect(key, wrapper); } } }
From source file:org.apache.hadoop.hbase.regionserver.StripeStoreFileManager.java
/** * Loads initial store files that were picked up from some physical location pertaining to * this store (presumably). Unlike adding files after compaction, assumes empty initial * sets, and is forgiving with regard to stripe constraints - at worst, many/all files will * go to level 0.//ww w . j a va 2s .c om * @param storeFiles Store files to add. */ private void loadUnclassifiedStoreFiles(List<StoreFile> storeFiles) { LOG.debug("Attempting to load " + storeFiles.size() + " store files."); TreeMap<byte[], ArrayList<StoreFile>> candidateStripes = new TreeMap<byte[], ArrayList<StoreFile>>( MAP_COMPARATOR); ArrayList<StoreFile> level0Files = new ArrayList<StoreFile>(); // Separate the files into tentative stripes; then validate. Currently, we rely on metadata. // If needed, we could dynamically determine the stripes in future. for (StoreFile sf : storeFiles) { byte[] startRow = startOf(sf), endRow = endOf(sf); // Validate the range and put the files into place. if (isInvalid(startRow) || isInvalid(endRow)) { insertFileIntoStripe(level0Files, sf); // No metadata - goes to L0. ensureLevel0Metadata(sf); } else if (!isOpen(startRow) && !isOpen(endRow) && nonOpenRowCompare(startRow, endRow) >= 0) { LOG.error("Unexpected metadata - start row [" + Bytes.toString(startRow) + "], end row [" + Bytes.toString(endRow) + "] in file [" + sf.getPath() + "], pushing to L0"); insertFileIntoStripe(level0Files, sf); // Bad metadata - goes to L0 also. ensureLevel0Metadata(sf); } else { ArrayList<StoreFile> stripe = candidateStripes.get(endRow); if (stripe == null) { stripe = new ArrayList<StoreFile>(); candidateStripes.put(endRow, stripe); } insertFileIntoStripe(stripe, sf); } } // Possible improvement - for variable-count stripes, if all the files are in L0, we can // instead create single, open-ended stripe with all files. boolean hasOverlaps = false; byte[] expectedStartRow = null; // first stripe can start wherever Iterator<Map.Entry<byte[], ArrayList<StoreFile>>> entryIter = candidateStripes.entrySet().iterator(); while (entryIter.hasNext()) { Map.Entry<byte[], ArrayList<StoreFile>> entry = entryIter.next(); ArrayList<StoreFile> files = entry.getValue(); // Validate the file start rows, and remove the bad ones to level 0. for (int i = 0; i < files.size(); ++i) { StoreFile sf = files.get(i); byte[] startRow = startOf(sf); if (expectedStartRow == null) { expectedStartRow = startRow; // ensure that first stripe is still consistent } else if (!rowEquals(expectedStartRow, startRow)) { hasOverlaps = true; LOG.warn("Store file doesn't fit into the tentative stripes - expected to start at [" + Bytes.toString(expectedStartRow) + "], but starts at [" + Bytes.toString(startRow) + "], to L0 it goes"); StoreFile badSf = files.remove(i); insertFileIntoStripe(level0Files, badSf); ensureLevel0Metadata(badSf); --i; } } // Check if any files from the candidate stripe are valid. If so, add a stripe. byte[] endRow = entry.getKey(); if (!files.isEmpty()) { expectedStartRow = endRow; // Next stripe must start exactly at that key. } else { entryIter.remove(); } } // In the end, there must be open ends on two sides. If not, and there were no errors i.e. // files are consistent, they might be coming from a split. We will treat the boundaries // as open keys anyway, and log the message. // If there were errors, we'll play it safe and dump everything into L0. if (!candidateStripes.isEmpty()) { StoreFile firstFile = candidateStripes.firstEntry().getValue().get(0); boolean isOpen = isOpen(startOf(firstFile)) && isOpen(candidateStripes.lastKey()); if (!isOpen) { LOG.warn("The range of the loaded files does not cover full key space: from [" + Bytes.toString(startOf(firstFile)) + "], to [" + Bytes.toString(candidateStripes.lastKey()) + "]"); if (!hasOverlaps) { ensureEdgeStripeMetadata(candidateStripes.firstEntry().getValue(), true); ensureEdgeStripeMetadata(candidateStripes.lastEntry().getValue(), false); } else { LOG.warn("Inconsistent files, everything goes to L0."); for (ArrayList<StoreFile> files : candidateStripes.values()) { for (StoreFile sf : files) { insertFileIntoStripe(level0Files, sf); ensureLevel0Metadata(sf); } } candidateStripes.clear(); } } } // Copy the results into the fields. State state = new State(); state.level0Files = ImmutableList.copyOf(level0Files); state.stripeFiles = new ArrayList<ImmutableList<StoreFile>>(candidateStripes.size()); state.stripeEndRows = new byte[Math.max(0, candidateStripes.size() - 1)][]; ArrayList<StoreFile> newAllFiles = new ArrayList<StoreFile>(level0Files); int i = candidateStripes.size() - 1; for (Map.Entry<byte[], ArrayList<StoreFile>> entry : candidateStripes.entrySet()) { state.stripeFiles.add(ImmutableList.copyOf(entry.getValue())); newAllFiles.addAll(entry.getValue()); if (i > 0) { state.stripeEndRows[state.stripeFiles.size() - 1] = entry.getKey(); } --i; } state.allFilesCached = ImmutableList.copyOf(newAllFiles); this.state = state; debugDumpState("Files loaded"); }
From source file:io.warp10.continuum.gts.GTSHelper.java
public static List<GeoTimeSerie> chunk(GeoTimeSerie gts, long lastchunk, long chunkwidth, long chunkcount, String chunklabel, boolean keepempty, long overlap) throws WarpScriptException { if (overlap < 0 || overlap > chunkwidth) { throw new WarpScriptException("Overlap cannot exceed chunk width."); }//www . j av a 2 s . c o m // // Check if 'chunklabel' exists in the GTS labels // Metadata metadata = gts.getMetadata(); if (metadata.getLabels().containsKey(chunklabel)) { throw new WarpScriptException( "Cannot operate on Geo Time Series which already have a label named '" + chunklabel + "'"); } TreeMap<Long, GeoTimeSerie> chunks = new TreeMap<Long, GeoTimeSerie>(); // // If GTS is bucketized, make sure bucketspan is less than boxwidth // boolean bucketized = GTSHelper.isBucketized(gts); if (bucketized) { if (gts.bucketspan > chunkwidth) { throw new WarpScriptException( "Cannot operate on Geo Time Series with a bucketspan greater than the chunk width."); } } else { // GTS is not bucketized and has 0 values, if lastchunk was 0, return an empty list as we // are unable to produce chunks if (0 == gts.values && 0L == lastchunk) { return new ArrayList<GeoTimeSerie>(); } } // // Set chunkcount to Integer.MAX_VALUE if it's 0 // boolean zeroChunkCount = false; if (0 == chunkcount) { chunkcount = Integer.MAX_VALUE; zeroChunkCount = true; } // // Sort timestamps in reverse order so we can produce all chunks in O(n) // GTSHelper.sort(gts, true); // // Loop on the chunks // // Index in the timestamp array int idx = 0; long bucketspan = gts.bucketspan; int bucketcount = gts.bucketcount; long lastbucket = gts.lastbucket; // // If lastchunk is 0, use lastbucket or the most recent tick // if (0 == lastchunk) { if (isBucketized(gts)) { lastchunk = lastbucket; } else { // Use the most recent tick lastchunk = gts.ticks[0]; // Make sure lastchunk is aligned on 'chunkwidth' boundary if (0 != (lastchunk % chunkwidth)) { lastchunk = lastchunk - (lastchunk % chunkwidth) + chunkwidth; } } } for (long i = 0; i < chunkcount; i++) { // If we have no more values and were not specified a chunk count, exit the loop, we're done if (idx >= gts.values && zeroChunkCount) { break; } // Compute chunk bounds long chunkend = lastchunk - i * chunkwidth; long chunkstart = chunkend - chunkwidth + 1; GeoTimeSerie chunkgts = new GeoTimeSerie(lastbucket, bucketcount, bucketspan, 16); // Set metadata for the GTS chunkgts.setMetadata(metadata); // Add 'chunklabel' chunkgts.getMetadata().putToLabels(chunklabel, Long.toString(chunkend)); if (bucketized) { // Chunk is outside the GTS, it will be empty if (lastbucket < chunkstart || chunkend <= lastbucket - (bucketcount * bucketspan)) { // Add the (empty) chunk if keepempty is true if (keepempty || overlap > 0) { chunks.put(chunkend, chunkgts); } continue; } // Set the bucketized parameters in the GTS // If bucketspan does not divide chunkwidth, chunks won't be bucketized if (0 == chunkwidth % bucketspan) { chunkgts.bucketspan = bucketspan; chunkgts.lastbucket = chunkend; chunkgts.bucketcount = (int) ((chunkend - chunkstart + 1) / bucketspan); } else { chunkgts.bucketspan = 0L; chunkgts.lastbucket = 0L; chunkgts.bucketspan = 0; } } // // Add the datapoints which fall within the current chunk // // Advance until the current tick is before 'chunkend' while (idx < gts.values && gts.ticks[idx] > chunkend) { idx++; } // We've exhausted the values if (idx >= gts.values) { // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } continue; } // The current tick is before the beginning of the current chunk if (gts.ticks[idx] < chunkstart) { // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } continue; } while (idx < gts.values && gts.ticks[idx] >= chunkstart) { GTSHelper.setValue(chunkgts, GTSHelper.tickAtIndex(gts, idx), GTSHelper.locationAtIndex(gts, idx), GTSHelper.elevationAtIndex(gts, idx), GTSHelper.valueAtIndex(gts, idx), false); idx++; } // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } } // // Handle overlapping is need be. // We need to iterate over all ticks and add datapoints to each GTS they belong to // if (overlap > 0) { // // Check if we need to add a first and a last chunk // long ts = GTSHelper.tickAtIndex(gts, 0); if (ts <= chunks.firstKey() - chunkwidth) { Entry<Long, GeoTimeSerie> currentFirst = chunks.firstEntry(); GeoTimeSerie firstChunk = currentFirst.getValue().cloneEmpty(); if (GTSHelper.isBucketized(currentFirst.getValue())) { firstChunk.lastbucket = firstChunk.lastbucket - firstChunk.bucketspan; } chunks.put(currentFirst.getKey() - chunkwidth, firstChunk); } ts = GTSHelper.tickAtIndex(gts, gts.values - 1); if (ts >= chunks.lastKey() - chunkwidth + 1 - overlap) { Entry<Long, GeoTimeSerie> currentLast = chunks.lastEntry(); GeoTimeSerie lastChunk = currentLast.getValue().cloneEmpty(); if (GTSHelper.isBucketized(currentLast.getValue())) { lastChunk.lastbucket = lastChunk.lastbucket + lastChunk.bucketspan; } chunks.put(currentLast.getKey() + chunkwidth, lastChunk); } // // Put all entries in a list so we can access them randomly // List<Entry<Long, GeoTimeSerie>> allchunks = new ArrayList<Entry<Long, GeoTimeSerie>>(chunks.entrySet()); int[] currentSizes = new int[allchunks.size()]; for (int i = 0; i < currentSizes.length; i++) { currentSizes[i] = allchunks.get(i).getValue().values; } // // Iterate over chunks, completing with prev and next overlaps // Remember the timestamps are in reverse order so far. // for (int i = 0; i < allchunks.size(); i++) { GeoTimeSerie current = allchunks.get(i).getValue(); long lowerBound = allchunks.get(i).getKey() - chunkwidth + 1 - overlap; long upperBound = allchunks.get(i).getKey() + overlap; if (i > 0) { GeoTimeSerie prev = allchunks.get(i - 1).getValue(); for (int j = 0; j < currentSizes[i - 1]; j++) { long timestamp = GTSHelper.tickAtIndex(prev, j); if (timestamp < lowerBound) { break; } GTSHelper.setValue(current, timestamp, GTSHelper.locationAtIndex(prev, j), GTSHelper.elevationAtIndex(prev, j), GTSHelper.valueAtIndex(prev, j), false); } } if (i < allchunks.size() - 1) { GeoTimeSerie next = allchunks.get(i + 1).getValue(); for (int j = currentSizes[i + 1] - 1; j >= 0; j--) { long timestamp = GTSHelper.tickAtIndex(next, j); if (timestamp > upperBound) { break; } GTSHelper.setValue(current, timestamp, GTSHelper.locationAtIndex(next, j), GTSHelper.elevationAtIndex(next, j), GTSHelper.valueAtIndex(next, j), false); } } } } List<GeoTimeSerie> result = new ArrayList<GeoTimeSerie>(); for (GeoTimeSerie g : chunks.values()) { if (!keepempty && 0 == g.values) { continue; } result.add(g); } return result; }
From source file:org.ncic.bioinfo.sparkseq.algorithms.walker.mutect.Mutect.java
@Override protected void map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext) { final char upRef = Character.toUpperCase((char) ref.getBase()); if (upRef != 'A' && upRef != 'C' && upRef != 'G' && upRef != 'T') { return;//from w w w. j a v a2s .co m } ReadBackedPileup tumorPileup = cleanNoneRefPileupElement(rawContext.getBasePileup()); ReadBackedPileup normalPileup = cleanNoneRefPileupElement(normalSamTraverser.next().getBasePileup()); // an optimization to speed things up when there is no coverage if (tumorPileup.depthOfCoverage() == 0 && normalPileup.depthOfCoverage() == 0) { return; } TreeMap<Double, CandidateMutation> messageByTumorLod = new TreeMap<Double, CandidateMutation>(); // get sequence context around mutation String sequenceContext = SequenceUtils.createSequenceContext(this.refContentProvider, ref, 3); try { final LocusReadPile tumorReadPile = new LocusReadPile(tumorPileup, upRef, MTAC.MIN_QSCORE, MIN_QSUM_QSCORE, false, MTAC.ARTIFACT_DETECTION_MODE, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile normalReadPile = new LocusReadPile(normalPileup, upRef, MTAC.MIN_QSCORE, 0, this.USE_MAPQ0_IN_NORMAL_QSCORE, true, MTAC.ENABLE_QSCORE_OUTPUT); Collection<VariantContext> panelOfNormalsVC = tracker.getValues(normalPanelRod, rawContext.getLocation()); Collection<VariantContext> cosmicVC = getVCInTrackerInLocus(RODNames.COSMIC, tracker); Collection<VariantContext> dbsnpVC = getVCInTrackerInLocus(RODNames.DBSNP, tracker); // remove the effect of cosmic from dbSNP boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty()); // compute coverage flags int tumorCoveredDepthThreshold = 14; int normalCoveredDepthThreshold = (germlineAtRisk) ? 19 : 8; if (!hasNormalBam) { normalCoveredDepthThreshold = 0; } int tumorBaseCount = tumorReadPile.finalPileupReads.size(); int normalBaseCount = normalReadPile.finalPileupReads.size(); boolean isTumorCovered = tumorBaseCount >= tumorCoveredDepthThreshold; boolean isNormalCovered = normalBaseCount >= normalCoveredDepthThreshold; boolean isBaseCovered = isTumorCovered && isNormalCovered; if (!hasNormalBam) { isBaseCovered = isTumorCovered; } int tumorQ20BaseCount = tumorReadPile.getFilteredBaseCount(20); int normalQ20BaseCount = normalReadPile.getFilteredBaseCount(20); // calculate power double tumorPower = tumorPowerCalculator.cachingPowerCalculation(tumorBaseCount, MTAC.POWER_CONSTANT_AF); double normalPowerNoSNPPrior = normalNovelSitePowerCalculator.cachingPowerCalculation(normalBaseCount); double normalPowerWithSNPPrior = normalDbSNPSitePowerCalculator .cachingPowerCalculation(normalBaseCount); double normalPower = (germlineAtRisk) ? normalPowerWithSNPPrior : normalPowerNoSNPPrior; double combinedPower = tumorPower * normalPower; if (!hasNormalBam) { combinedPower = tumorPower; } int mapQ0Reads = tumorReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads() + normalReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads(); int totalReads = tumorReadPile.qualityScoreFilteredPileup.depthOfCoverage() + normalReadPile.qualityScoreFilteredPileup.depthOfCoverage(); // Test each of the possible alternate alleles for (final char altAllele : new char[] { 'A', 'C', 'G', 'T' }) { if (altAllele == upRef) { continue; } if (!MTAC.FORCE_OUTPUT && tumorReadPile.qualitySums.getCounts(altAllele) == 0) { continue; } CandidateMutation candidate = new CandidateMutation(rawContext.getLocation(), upRef); candidate.setSequenceContext(sequenceContext); candidate.setTumorSampleName(MTAC.TUMOR_SAMPLE_NAME); candidate.setNormalSampleName(MTAC.NORMAL_SAMPLE_NAME); candidate.setCovered(isBaseCovered); candidate.setPower(combinedPower); candidate.setTumorPower(tumorPower); candidate.setNormalPower(normalPower); candidate.setNormalPowerWithSNPPrior(normalPowerWithSNPPrior); candidate.setNormalPowerNoSNPPrior(normalPowerNoSNPPrior); candidate.setTumorQ20Count(tumorQ20BaseCount); candidate.setNormalQ20Count(normalQ20BaseCount); candidate.setInitialTumorNonRefQualitySum(tumorReadPile.qualitySums.getOtherQualities(upRef)); candidate.setAltAllele(altAllele); candidate.setMapQ0Reads(mapQ0Reads); candidate.setTotalReads(totalReads); candidate.setContaminationFraction(MTAC.FRACTION_CONTAMINATION); candidate.setPanelOfNormalsVC( panelOfNormalsVC.isEmpty() ? null : panelOfNormalsVC.iterator().next()); // if there are multiple, we're just grabbing the first candidate.setCosmicSite(!cosmicVC.isEmpty()); candidate.setDbsnpSite(!dbsnpVC.isEmpty()); candidate.setDbsnpVC(dbsnpVC.isEmpty() ? null : dbsnpVC.iterator().next()); candidate.setTumorF(tumorReadPile.estimateAlleleFraction(upRef, altAllele)); if (!MTAC.FORCE_OUTPUT && candidate.getTumorF() < MTAC.TUMOR_F_PRETEST) { continue; } candidate.setInitialTumorAltCounts(tumorReadPile.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(tumorReadPile.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(tumorReadPile.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(tumorReadPile.qualitySums.getQualitySum(upRef)); double tumorLod = tumorReadPile.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod); candidate.setInitialTumorReadDepth(tumorReadPile.finalPileupReads.size()); candidate.setTumorInsertionCount(tumorReadPile.getInsertionsCount()); candidate.setTumorDeletionCount(tumorReadPile.getDeletionsCount()); if (candidate.getTumorLodFStar() < MTAC.INITIAL_TUMOR_LOD_THRESHOLD) { continue; } // calculate lod of contaminant double contaminantF = Math.min(contaminantAlternateFraction, candidate.getTumorF()); VariableAllelicRatioGenotypeLikelihoods contaminantLikelihoods = new VariableAllelicRatioGenotypeLikelihoods( upRef, contaminantF); List<PileupElement> peList = new ArrayList<PileupElement>( tumorReadPile.finalPileup.depthOfCoverage()); for (PileupElement pe : tumorReadPile.finalPileup) { peList.add(pe); } Collections.sort(peList, new PileupComparatorByAltRefQual((byte) altAllele)); int readsToKeep = (int) (peList.size() * contaminantAlternateFraction); for (PileupElement pe : peList) { byte base = pe.getBase(); if (pe.getBase() == altAllele) { // if we've retained all we need, then turn the remainder of alts to ref if (readsToKeep == 0) { base = (byte) upRef; } else { readsToKeep--; } } contaminantLikelihoods.add(base, pe.getQual()); } double[] refHetHom = LocusReadPile.extractRefHetHom(contaminantLikelihoods, upRef, altAllele); double contaminantLod = refHetHom[1] - refHetHom[0]; candidate.setContaminantLod(contaminantLod); final QualitySums normQs = normalReadPile.qualitySums; VariableAllelicRatioGenotypeLikelihoods normalGl = normalReadPile .calculateLikelihoods(normalReadPile.qualityScoreFilteredPileup); // use MAPQ0 reads candidate.setInitialNormalBestGenotype(normalReadPile.getBestGenotype(normalGl)); candidate.setInitialNormalLod(LocusReadPile.getRefVsAlt(normalGl, upRef, altAllele)); double normalF = Math.max(LocusReadPile .estimateAlleleFraction(normalReadPile.qualityScoreFilteredPileup, upRef, altAllele), MTAC.MINIMUM_NORMAL_ALLELE_FRACTION); candidate.setNormalF(normalF); candidate.setInitialNormalAltQualitySum(normQs.getQualitySum(altAllele)); candidate.setInitialNormalRefQualitySum(normQs.getQualitySum(upRef)); candidate.setNormalAltQualityScores(normQs.getBaseQualityScores(altAllele)); candidate.setNormalRefQualityScores(normQs.getBaseQualityScores(upRef)); candidate.setInitialNormalAltCounts(normQs.getCounts(altAllele)); candidate.setInitialNormalRefCounts(normQs.getCounts(upRef)); candidate.setInitialNormalReadDepth(normalReadPile.finalPileupReads.size()); // TODO: parameterize filtering Mate-Rescued Reads (if someone wants to disable this) final LocusReadPile t2 = filterReads(ref, tumorReadPile.finalPileup, true); // if there are no reads remaining, abandon this theory if (!MTAC.FORCE_OUTPUT && t2.finalPileupReads.size() == 0) { continue; } candidate.setInitialTumorAltCounts(t2.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(t2.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(t2.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(t2.qualitySums.getQualitySum(upRef)); candidate.setTumorAltQualityScores(t2.qualitySums.getBaseQualityScores(altAllele)); candidate.setTumorRefQualityScores(t2.qualitySums.getBaseQualityScores(upRef)); VariableAllelicRatioGenotypeLikelihoods t2Gl = t2.calculateLikelihoods(t2.finalPileup); candidate.setInitialTumorLod(t2.getAltVsRef(t2Gl, upRef, altAllele)); candidate.setInitialTumorReadDepth(t2.finalPileupReads.size()); candidate.setTumorF(t2.estimateAlleleFraction(upRef, altAllele)); double tumorLod2 = t2.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod2); //TODO: clean up use of forward/reverse vs positive/negative (prefer the latter since GATK uses it) ReadBackedPileup forwardPileup = filterReads(ref, tumorReadPile.finalPileupPositiveStrand, true).finalPileupPositiveStrand; double f2forward = LocusReadPile.estimateAlleleFraction(forwardPileup, upRef, altAllele); candidate.setTumorLodFStarForward( t2.calculateAltVsRefLOD(forwardPileup, (byte) altAllele, f2forward, 0.0)); ReadBackedPileup reversePileup = filterReads(ref, tumorReadPile.finalPileupNegativeStrand, true).finalPileupNegativeStrand; double f2reverse = LocusReadPile.estimateAlleleFraction(reversePileup, upRef, altAllele); candidate.setTumorLodFStarReverse( t2.calculateAltVsRefLOD(reversePileup, (byte) altAllele, f2reverse, 0.0)); // calculate strand bias power candidate.setPowerToDetectPositiveStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(reversePileup.depthOfCoverage(), candidate.getTumorF())); candidate.setPowerToDetectNegativeStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(forwardPileup.depthOfCoverage(), candidate.getTumorF())); candidate.setStrandContingencyTable(SequenceUtils.getStrandContingencyTable(forwardPileup, reversePileup, (byte) upRef, (byte) altAllele)); ArrayList<PileupElement> mutantPileupElements = new ArrayList<PileupElement>(); ArrayList<PileupElement> referencePileupElements = new ArrayList<PileupElement>(); for (PileupElement p : t2.finalPileup) { final SAMRecord read = p.getRead(); final int offset = p.getOffset(); if (read.getReadString().charAt(offset) == altAllele) { mutantPileupElements.add(p); } else if (read.getReadString().charAt(offset) == upRef) { referencePileupElements.add(p); } else { // just drop the read... } } ReadBackedPileup mutantPileup = new ReadBackedPileupImpl(rawContext.getLocation(), mutantPileupElements); ReadBackedPileup referencePileup = new ReadBackedPileupImpl(rawContext.getLocation(), referencePileupElements); // TODO: shouldn't this be refAllele here? final LocusReadPile mutantPile = new LocusReadPile(mutantPileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile refPile = new LocusReadPile(referencePileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); // Set the maximum observed mapping quality score for the reference and alternate alleles int[] rmq = referencePileup.getMappingQuals(); candidate.setTumorRefMaxMapQ((rmq.length == 0) ? 0 : NumberUtils.max(rmq)); int[] amq = mutantPileup.getMappingQuals(); candidate.setTumorAltMaxMapQ((amq.length == 0) ? 0 : NumberUtils.max(amq)); // start with just the tumor pile candidate.setTumorAltForwardOffsetsInRead(SequenceUtils.getForwardOffsetsInRead(mutantPileup)); candidate.setTumorAltReverseOffsetsInRead(SequenceUtils.getReverseOffsetsInRead(mutantPileup)); if (candidate.getTumorAltForwardOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltForwardOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorForwardOffsetsInReadMedian(median); candidate.setTumorForwardOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } if (candidate.getTumorAltReverseOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltReverseOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorReverseOffsetsInReadMedian(median); candidate.setTumorReverseOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } // test to see if the candidate should be rejected performRejection(candidate); messageByTumorLod.put(candidate.getInitialTumorLod(), candidate); } // if more than one site passes the tumor lod threshold for KEEP the fail the tri_allelic Site filter int passingCandidates = 0; for (CandidateMutation c : messageByTumorLod.values()) { if (c.getTumorLodFStar() >= MTAC.TUMOR_LOD_THRESHOLD) { passingCandidates++; } } if (passingCandidates > 1) { for (CandidateMutation c : messageByTumorLod.values()) { c.addRejectionReason("triallelic_site"); } } // write out the call stats for the "best" candidate if (!messageByTumorLod.isEmpty()) { CandidateMutation m = messageByTumorLod.lastEntry().getValue(); // only output passing calls OR rejected sites if ONLY_PASSING_CALLS is not specified if (!m.isRejected() || (m.isRejected() && !MTAC.ONLY_PASSING_CALLS)) { //out.println(callStatsGenerator.generateCallStats(m)); resultVCFOutInfos.add(callStatsGenerator.generateCallStats(m)); resultVCFRecords.add(VCFGenerator.generateVC(m)); } } } catch (MathException me) { throw new GATKException(me.getMessage()); } }
From source file:org.broadinstitute.cga.tools.gatk.walkers.cancer.mutect.MuTect.java
@Override public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext rawContext) { if (MTAC.NOOP) return 0; TreeMap<Double, CandidateMutation> messageByTumorLod = new TreeMap<Double, CandidateMutation>(); ReadBackedPileup pileup = rawContext.getBasePileup(); int numberOfReads = pileup.depthOfCoverage(); binReadsProcessed += numberOfReads;/*from w w w.j ava 2 s. c o m*/ if (binReadsProcessed >= 1000000) { long time = System.currentTimeMillis(); long elapsedTime = time - lastTime; lastTime = time; totalReadsProcessed += binReadsProcessed; binReadsProcessed = 0; logger.info(String.format("[MUTECT] Processed %d reads in %d ms", totalReadsProcessed, elapsedTime)); } // an optimization to speed things up when there is no coverage if (!MTAC.FORCE_OUTPUT && numberOfReads == 0) { return -1; } // get sequence context around mutation String sequenceContext = SequenceUtils.createSequenceContext(ref, 3); // only process bases where the reference is [ACGT], because the FASTA for HG18 has N,M and R! final char upRef = Character.toUpperCase(ref.getBaseAsChar()); if (upRef != 'A' && upRef != 'C' && upRef != 'G' && upRef != 'T') { return -1; } try { Map<SampleType, ReadBackedPileup> pileupMap = getPileupsBySampleType(pileup); final LocusReadPile tumorReadPile = new LocusReadPile(pileupMap.get(SampleType.TUMOR), upRef, MTAC.MIN_QSCORE, MIN_QSUM_QSCORE, false, MTAC.ARTIFACT_DETECTION_MODE, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile normalReadPile = new LocusReadPile(pileupMap.get(SampleType.NORMAL), upRef, MTAC.MIN_QSCORE, 0, this.USE_MAPQ0_IN_NORMAL_QSCORE, true, MTAC.ENABLE_QSCORE_OUTPUT); Collection<VariantContext> panelOfNormalsVC = tracker.getValues(normalPanelRod, rawContext.getLocation()); Collection<VariantContext> cosmicVC = tracker.getValues(cosmicRod, rawContext.getLocation()); Collection<VariantContext> dbsnpVC = tracker.getValues(dbsnpRod, rawContext.getLocation()); // remove the effect of cosmic from dbSNP boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty()); // compute coverage flags int tumorCoveredDepthThreshold = 14; int normalCoveredDepthThreshold = (germlineAtRisk) ? 19 : 8; if (!hasNormalBam) { normalCoveredDepthThreshold = 0; } int tumorBaseCount = tumorReadPile.finalPileupReads.size(); int normalBaseCount = normalReadPile.finalPileupReads.size(); boolean isTumorCovered = tumorBaseCount >= tumorCoveredDepthThreshold; boolean isNormalCovered = normalBaseCount >= normalCoveredDepthThreshold; boolean isBaseCovered = isTumorCovered && isNormalCovered; if (!hasNormalBam) { isBaseCovered = isTumorCovered; } stdCovWriter.writeCoverage(rawContext, isBaseCovered); int tumorQ20BaseCount = tumorReadPile.getFilteredBaseCount(20); int normalQ20BaseCount = normalReadPile.getFilteredBaseCount(20); q20CovWriter.writeCoverage(rawContext, tumorQ20BaseCount >= 20 && normalQ20BaseCount >= 20); tumorDepthWriter.writeCoverage(rawContext, tumorBaseCount); normalDepthWriter.writeCoverage(rawContext, normalBaseCount); // calculate power double tumorPower = tumorPowerCalculator.cachingPowerCalculation(tumorBaseCount, MTAC.POWER_CONSTANT_AF); double normalPowerNoSNPPrior = normalNovelSitePowerCalculator.cachingPowerCalculation(normalBaseCount); double normalPowerWithSNPPrior = normalDbSNPSitePowerCalculator .cachingPowerCalculation(normalBaseCount); double normalPower = (germlineAtRisk) ? normalPowerWithSNPPrior : normalPowerNoSNPPrior; double combinedPower = tumorPower * normalPower; if (!hasNormalBam) { combinedPower = tumorPower; } powerWriter.writeCoverage(rawContext, combinedPower); int mapQ0Reads = tumorReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads() + normalReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads(); int totalReads = tumorReadPile.qualityScoreFilteredPileup.depthOfCoverage() + normalReadPile.qualityScoreFilteredPileup.depthOfCoverage(); // Test each of the possible alternate alleles for (final char altAllele : new char[] { 'A', 'C', 'G', 'T' }) { if (altAllele == upRef) { continue; } if (!MTAC.FORCE_OUTPUT && tumorReadPile.qualitySums.getCounts(altAllele) == 0) { continue; } CandidateMutation candidate = new CandidateMutation(rawContext.getLocation(), upRef); candidate.setSequenceContext(sequenceContext); candidate.setTumorSampleName(MTAC.TUMOR_SAMPLE_NAME); candidate.setNormalSampleName(MTAC.NORMAL_SAMPLE_NAME); candidate.setCovered(isBaseCovered); candidate.setPower(combinedPower); candidate.setTumorPower(tumorPower); candidate.setNormalPower(normalPower); candidate.setNormalPowerWithSNPPrior(normalPowerWithSNPPrior); candidate.setNormalPowerNoSNPPrior(normalPowerNoSNPPrior); candidate.setTumorQ20Count(tumorQ20BaseCount); candidate.setNormalQ20Count(normalQ20BaseCount); candidate.setInitialTumorNonRefQualitySum(tumorReadPile.qualitySums.getOtherQualities(upRef)); candidate.setAltAllele(altAllele); candidate.setMapQ0Reads(mapQ0Reads); candidate.setTotalReads(totalReads); candidate.setContaminationFraction(MTAC.FRACTION_CONTAMINATION); candidate.setPanelOfNormalsVC( panelOfNormalsVC.isEmpty() ? null : panelOfNormalsVC.iterator().next()); // if there are multiple, we're just grabbing the first candidate.setCosmicSite(!cosmicVC.isEmpty()); candidate.setDbsnpSite(!dbsnpVC.isEmpty()); candidate.setDbsnpVC(dbsnpVC.isEmpty() ? null : dbsnpVC.iterator().next()); candidate.setTumorF(tumorReadPile.estimateAlleleFraction(upRef, altAllele)); if (!MTAC.FORCE_OUTPUT && candidate.getTumorF() < MTAC.TUMOR_F_PRETEST) { continue; } if (++candidatesInspected % 1000 == 0) { logger.info(String.format("[MUTECT] Inspected %d potential candidates", candidatesInspected)); } candidate.setInitialTumorAltCounts(tumorReadPile.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(tumorReadPile.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(tumorReadPile.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(tumorReadPile.qualitySums.getQualitySum(upRef)); double tumorLod = tumorReadPile.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod); candidate.setInitialTumorReadDepth(tumorReadPile.finalPileupReads.size()); candidate.setTumorInsertionCount(tumorReadPile.getInsertionsCount()); candidate.setTumorDeletionCount(tumorReadPile.getDeletionsCount()); if (candidate.getTumorLodFStar() < MTAC.INITIAL_TUMOR_LOD_THRESHOLD) { continue; } // calculate lod of contaminant double contaminantF = Math.min(contaminantAlternateFraction, candidate.getTumorF()); VariableAllelicRatioGenotypeLikelihoods contaminantLikelihoods = new VariableAllelicRatioGenotypeLikelihoods( upRef, contaminantF); List<PileupElement> peList = new ArrayList<PileupElement>( tumorReadPile.finalPileup.depthOfCoverage()); for (PileupElement pe : tumorReadPile.finalPileup) { peList.add(pe); } Collections.sort(peList, new PileupComparatorByAltRefQual((byte) altAllele)); int readsToKeep = (int) (peList.size() * contaminantAlternateFraction); for (PileupElement pe : peList) { byte base = pe.getBase(); if (pe.getBase() == altAllele) { // if we've retained all we need, then turn the remainder of alts to ref if (readsToKeep == 0) { base = (byte) upRef; } else { readsToKeep--; } } contaminantLikelihoods.add(base, pe.getQual()); } double[] refHetHom = LocusReadPile.extractRefHetHom(contaminantLikelihoods, upRef, altAllele); double contaminantLod = refHetHom[1] - refHetHom[0]; candidate.setContaminantLod(contaminantLod); final QualitySums normQs = normalReadPile.qualitySums; VariableAllelicRatioGenotypeLikelihoods normalGl = normalReadPile .calculateLikelihoods(normalReadPile.qualityScoreFilteredPileup); // use MAPQ0 reads candidate.setInitialNormalBestGenotype(normalReadPile.getBestGenotype(normalGl)); candidate.setInitialNormalLod(LocusReadPile.getRefVsAlt(normalGl, upRef, altAllele)); double normalF = Math.max(LocusReadPile .estimateAlleleFraction(normalReadPile.qualityScoreFilteredPileup, upRef, altAllele), MTAC.MINIMUM_NORMAL_ALLELE_FRACTION); candidate.setNormalF(normalF); candidate.setInitialNormalAltQualitySum(normQs.getQualitySum(altAllele)); candidate.setInitialNormalRefQualitySum(normQs.getQualitySum(upRef)); candidate.setNormalAltQualityScores(normQs.getBaseQualityScores(altAllele)); candidate.setNormalRefQualityScores(normQs.getBaseQualityScores(upRef)); candidate.setInitialNormalAltCounts(normQs.getCounts(altAllele)); candidate.setInitialNormalRefCounts(normQs.getCounts(upRef)); candidate.setInitialNormalReadDepth(normalReadPile.finalPileupReads.size()); // TODO: parameterize filtering Mate-Rescued Reads (if someone wants to disable this) final LocusReadPile t2 = filterReads(ref, tumorReadPile.finalPileup, true); // if there are no reads remaining, abandon this theory if (!MTAC.FORCE_OUTPUT && t2.finalPileupReads.size() == 0) { continue; } candidate.setInitialTumorAltCounts(t2.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(t2.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(t2.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(t2.qualitySums.getQualitySum(upRef)); candidate.setTumorAltQualityScores(t2.qualitySums.getBaseQualityScores(altAllele)); candidate.setTumorRefQualityScores(t2.qualitySums.getBaseQualityScores(upRef)); VariableAllelicRatioGenotypeLikelihoods t2Gl = t2.calculateLikelihoods(t2.finalPileup); candidate.setInitialTumorLod(t2.getAltVsRef(t2Gl, upRef, altAllele)); candidate.setInitialTumorReadDepth(t2.finalPileupReads.size()); candidate.setTumorF(t2.estimateAlleleFraction(upRef, altAllele)); double tumorLod2 = t2.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod2); //TODO: clean up use of forward/reverse vs positive/negative (prefer the latter since GATK uses it) ReadBackedPileup forwardPileup = filterReads(ref, tumorReadPile.finalPileupPositiveStrand, true).finalPileupPositiveStrand; double f2forward = LocusReadPile.estimateAlleleFraction(forwardPileup, upRef, altAllele); candidate.setTumorLodFStarForward( t2.calculateAltVsRefLOD(forwardPileup, (byte) altAllele, f2forward, 0.0)); ReadBackedPileup reversePileup = filterReads(ref, tumorReadPile.finalPileupNegativeStrand, true).finalPileupNegativeStrand; double f2reverse = LocusReadPile.estimateAlleleFraction(reversePileup, upRef, altAllele); candidate.setTumorLodFStarReverse( t2.calculateAltVsRefLOD(reversePileup, (byte) altAllele, f2reverse, 0.0)); // calculate strand bias power candidate.setPowerToDetectPositiveStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(reversePileup.depthOfCoverage(), candidate.getTumorF())); candidate.setPowerToDetectNegativeStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(forwardPileup.depthOfCoverage(), candidate.getTumorF())); candidate.setStrandContingencyTable(SequenceUtils.getStrandContingencyTable(forwardPileup, reversePileup, (byte) upRef, (byte) altAllele)); ArrayList<PileupElement> mutantPileupElements = new ArrayList<PileupElement>(); ArrayList<PileupElement> referencePileupElements = new ArrayList<PileupElement>(); for (PileupElement p : t2.finalPileup) { final SAMRecord read = p.getRead(); final int offset = p.getOffset(); if (read.getReadString().charAt(offset) == altAllele) { mutantPileupElements.add(p); } else if (read.getReadString().charAt(offset) == upRef) { referencePileupElements.add(p); } else { // just drop the read... } } ReadBackedPileup mutantPileup = new ReadBackedPileupImpl(rawContext.getLocation(), mutantPileupElements); ReadBackedPileup referencePileup = new ReadBackedPileupImpl(rawContext.getLocation(), referencePileupElements); // TODO: shouldn't this be refAllele here? final LocusReadPile mutantPile = new LocusReadPile(mutantPileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile refPile = new LocusReadPile(referencePileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); // Set the maximum observed mapping quality score for the reference and alternate alleles int[] rmq = referencePileup.getMappingQuals(); candidate.setTumorRefMaxMapQ((rmq.length == 0) ? 0 : NumberUtils.max(rmq)); int[] amq = mutantPileup.getMappingQuals(); candidate.setTumorAltMaxMapQ((amq.length == 0) ? 0 : NumberUtils.max(amq)); // start with just the tumor pile candidate.setTumorAltForwardOffsetsInRead(SequenceUtils.getForwardOffsetsInRead(mutantPileup)); candidate.setTumorAltReverseOffsetsInRead(SequenceUtils.getReverseOffsetsInRead(mutantPileup)); if (candidate.getTumorAltForwardOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltForwardOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorForwardOffsetsInReadMedian(median); candidate.setTumorForwardOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } if (candidate.getTumorAltReverseOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltReverseOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorReverseOffsetsInReadMedian(median); candidate.setTumorReverseOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } // test to see if the candidate should be rejected performRejection(candidate); if (MTAC.FORCE_ALLELES) { out.println(callStatsGenerator.generateCallStats(candidate)); } else { messageByTumorLod.put(candidate.getInitialTumorLod(), candidate); } } // if more than one site passes the tumor lod threshold for KEEP the fail the tri_allelic Site filter int passingCandidates = 0; for (CandidateMutation c : messageByTumorLod.values()) { if (c.getTumorLodFStar() >= MTAC.TUMOR_LOD_THRESHOLD) { passingCandidates++; } } if (passingCandidates > 1) { for (CandidateMutation c : messageByTumorLod.values()) { c.addRejectionReason("triallelic_site"); } } // write out the call stats for the "best" candidate if (!messageByTumorLod.isEmpty()) { CandidateMutation m = messageByTumorLod.lastEntry().getValue(); // only output passing calls OR rejected sites if ONLY_PASSING_CALLS is not specified if (!m.isRejected() || (m.isRejected() && !MTAC.ONLY_PASSING_CALLS)) { out.println(callStatsGenerator.generateCallStats(m)); if (vcf != null) { vcf.add(VCFGenerator.generateVC(m)); } } } return -1; } catch (Throwable t) { System.err.println("Error processing " + rawContext.getContig() + ":" + rawContext.getPosition()); t.printStackTrace(System.err); throw new RuntimeException(t); } }