List of usage examples for java.util TreeMap values
public Collection<V> values()
From source file:org.ireland.jnetty.http.HttpServletRequestImpl.java
/** * Parse accept-language header value.//w w w .ja v a2 s .co m */ protected void parseLocalesHeader(String value) { if (parser == null) parser = new StringParser(); // Store the accumulated languages that have been requested in // a local collection, sorted by the quality value (so we can // add Locales in descending order). The values will be ArrayLists // containing the corresponding Locales to be added TreeMap<Double, ArrayList<Locale>> locales = new TreeMap<Double, ArrayList<Locale>>(); // Preprocess the value to remove all whitespace int white = value.indexOf(' '); if (white < 0) { white = value.indexOf('\t'); } if (white >= 0) { StringBuilder sb = new StringBuilder(); int len = value.length(); for (int i = 0; i < len; i++) { char ch = value.charAt(i); if ((ch != ' ') && (ch != '\t')) { sb.append(ch); } } parser.setString(sb.toString()); } else { parser.setString(value); } // Process each comma-delimited language specification int length = parser.getLength(); while (true) { // Extract the next comma-delimited entry int start = parser.getIndex(); if (start >= length) { break; } int end = parser.findChar(','); String entry = parser.extract(start, end).trim(); parser.advance(); // For the following entry // Extract the quality factor for this entry double quality = 1.0; int semi = entry.indexOf(";q="); if (semi >= 0) { try { String strQuality = entry.substring(semi + 3); if (strQuality.length() <= 5) { quality = Double.parseDouble(strQuality); } else { quality = 0.0; } } catch (NumberFormatException e) { quality = 0.0; } entry = entry.substring(0, semi); } // Skip entries we are not going to keep track of if (quality < 0.00005) { continue; // Zero (or effectively zero) quality factors } if ("*".equals(entry)) { continue; // FIXME - "*" entries are not handled } // Extract the language and country for this entry String language = null; String country = null; String variant = null; int dash = entry.indexOf('-'); if (dash < 0) { language = entry; country = ""; variant = ""; } else { language = entry.substring(0, dash); country = entry.substring(dash + 1); int vDash = country.indexOf('-'); if (vDash > 0) { String cTemp = country.substring(0, vDash); variant = country.substring(vDash + 1); country = cTemp; } else { variant = ""; } } if (!isAlpha(language) || !isAlpha(country) || !isAlpha(variant)) { continue; } // Add a new Locale to the list of Locales for this quality level Locale locale = new Locale(language, country, variant); Double key = new Double(-quality); // Reverse the order ArrayList<Locale> values = locales.get(key); if (values == null) { values = new ArrayList<Locale>(); locales.put(key, values); } values.add(locale); } // Process the quality values in highest->lowest order (due to // negating the Double value when creating the key) for (ArrayList<Locale> list : locales.values()) { for (Locale locale : list) { addLocale(locale); } } }
From source file:com.dell.asm.asmcore.asmmanager.app.rest.ServiceTemplateService.java
private void applyRaidCustomizationFromProfile(ServiceTemplateSetting setting, TreeMap<String, Component> controllers) { TemplateRaidConfiguration raidConfiguration = new TemplateRaidConfiguration(); raidConfiguration.setRaidtype(TemplateRaidConfiguration.RaidTypeUI.basic); raidConfiguration.setBasicraidlevel(VirtualDiskConfiguration.UIRaidLevel.raid0); raidConfiguration.setEnableglobalhotspares(false); String controllerFQDD = null; for (Component controllerComponent : controllers.values()) { // for each controller lookup for virtual disks for (Component subComponent : controllerComponent.getComponent()) { if (subComponent.getFQDD().toLowerCase().startsWith("disk.virtual.")) { raidConfiguration.setRaidtype(TemplateRaidConfiguration.RaidTypeUI.advanced); controllerFQDD = controllerComponent.getFQDD(); String virtualDiskName = subComponent.getFQDD(); VirtualDiskConfiguration vd = new VirtualDiskConfiguration(); vd.setComparator(VirtualDiskConfiguration.ComparatorValue.exact); vd.setId(virtualDiskName); vd.setDisktype(VirtualDiskConfiguration.DiskMediaType.any); raidConfiguration.getVirtualdisks().add(vd); int numOfDisks = 0; for (Attribute attribute : subComponent.getAttribute()) { if (attribute.getName().equals("RAIDTypes")) { vd.setRaidlevel( VirtualDiskConfiguration.UIRaidLevel.fromConfigValue(attribute.getValue())); } else if (attribute.getName().equals("IncludedPhysicalDiskID")) { numOfDisks++;/*w w w . j av a 2s . c o m*/ } } vd.setNumberofdisks(numOfDisks); } else if (subComponent.getFQDD().toLowerCase().startsWith("enclosure.internal.")) { int numOfHotspares = 0; for (Component physicalDiskComponent : subComponent.getComponent()) { for (Attribute attribute : physicalDiskComponent.getAttribute()) { if (attribute.getName().equals("RAIDHotSpareStatus")) { // No, Dedicated, Global if (attribute.getValue().contains("Global")) { raidConfiguration.setEnableglobalhotspares(true); numOfHotspares++; } } } } raidConfiguration.setGlobalhotspares(numOfHotspares); } } if (controllerFQDD != null) break; // process only the first found controller with virtual disks } ObjectMapper objectMapper = new ObjectMapper(); try { setting.setValue(objectMapper.writeValueAsString(raidConfiguration)); } catch (JsonProcessingException e) { throw new AsmManagerInternalErrorException("applyRaidCustomization", "ServiceTemplateService", e); } }
From source file:com.netxforge.oss2.config.AmiPeerFactory.java
/** * Combine specific and range elements so that AMIPeerFactory has to spend * less time iterating all these elements. * TODO This really should be pulled up into PeerFactory somehow, but I'm not sure how (given that "Definition" is different for both * SNMP and AMI. Maybe some sort of visitor methodology would work. The basic logic should be fine as it's all IP address manipulation * * @throws UnknownHostException/* ww w . j a va2 s .com*/ */ void optimize() throws UnknownHostException { getWriteLock().lock(); try { // First pass: Remove empty definition elements for (final Iterator<Definition> definitionsIterator = m_config.getDefinitionCollection() .iterator(); definitionsIterator.hasNext();) { final Definition definition = definitionsIterator.next(); if (definition.getSpecificCount() == 0 && definition.getRangeCount() == 0) { LogUtils.debugf(this, "optimize: Removing empty definition element"); definitionsIterator.remove(); } } // Second pass: Replace single IP range elements with specific elements for (Definition definition : m_config.getDefinitionCollection()) { for (Iterator<Range> rangesIterator = definition.getRangeCollection().iterator(); rangesIterator .hasNext();) { Range range = rangesIterator.next(); if (range.getBegin().equals(range.getEnd())) { definition.addSpecific(range.getBegin()); rangesIterator.remove(); } } } // Third pass: Sort specific and range elements for improved XML // readability and then combine them into fewer elements where possible for (final Definition definition : m_config.getDefinitionCollection()) { // Sort specifics final TreeMap<InetAddress, String> specificsMap = new TreeMap<InetAddress, String>( new InetAddressComparator()); for (final String specific : definition.getSpecificCollection()) { specificsMap.put(InetAddressUtils.getInetAddress(specific), specific.trim()); } // Sort ranges final TreeMap<InetAddress, Range> rangesMap = new TreeMap<InetAddress, Range>( new InetAddressComparator()); for (final Range range : definition.getRangeCollection()) { rangesMap.put(InetAddressUtils.getInetAddress(range.getBegin()), range); } // Combine consecutive specifics into ranges InetAddress priorSpecific = null; Range addedRange = null; for (final InetAddress specific : specificsMap.keySet()) { if (priorSpecific == null) { priorSpecific = specific; continue; } if (BigInteger.ONE.equals(InetAddressUtils.difference(specific, priorSpecific)) && InetAddressUtils.inSameScope(specific, priorSpecific)) { if (addedRange == null) { addedRange = new Range(); addedRange.setBegin(InetAddressUtils.toIpAddrString(priorSpecific)); rangesMap.put(priorSpecific, addedRange); specificsMap.remove(priorSpecific); } addedRange.setEnd(InetAddressUtils.toIpAddrString(specific)); specificsMap.remove(specific); } else { addedRange = null; } priorSpecific = specific; } // Move specifics to ranges for (final InetAddress specific : new ArrayList<InetAddress>(specificsMap.keySet())) { for (final InetAddress begin : new ArrayList<InetAddress>(rangesMap.keySet())) { if (!InetAddressUtils.inSameScope(begin, specific)) { continue; } if (InetAddressUtils.toInteger(begin).subtract(BigInteger.ONE) .compareTo(InetAddressUtils.toInteger(specific)) > 0) { continue; } final Range range = rangesMap.get(begin); final InetAddress end = InetAddressUtils.getInetAddress(range.getEnd()); if (InetAddressUtils.toInteger(end).add(BigInteger.ONE) .compareTo(InetAddressUtils.toInteger(specific)) < 0) { continue; } if (InetAddressUtils.toInteger(specific).compareTo(InetAddressUtils.toInteger(begin)) >= 0 && InetAddressUtils.toInteger(specific) .compareTo(InetAddressUtils.toInteger(end)) <= 0) { specificsMap.remove(specific); break; } if (InetAddressUtils.toInteger(begin).subtract(BigInteger.ONE) .equals(InetAddressUtils.toInteger(specific))) { rangesMap.remove(begin); rangesMap.put(specific, range); range.setBegin(InetAddressUtils.toIpAddrString(specific)); specificsMap.remove(specific); break; } if (InetAddressUtils.toInteger(end).add(BigInteger.ONE) .equals(InetAddressUtils.toInteger(specific))) { range.setEnd(InetAddressUtils.toIpAddrString(specific)); specificsMap.remove(specific); break; } } } // Combine consecutive ranges Range priorRange = null; InetAddress priorBegin = null; InetAddress priorEnd = null; for (final Iterator<InetAddress> rangesIterator = rangesMap.keySet().iterator(); rangesIterator .hasNext();) { final InetAddress beginAddress = rangesIterator.next(); final Range range = rangesMap.get(beginAddress); final InetAddress endAddress = InetAddressUtils.getInetAddress(range.getEnd()); if (priorRange != null) { if (InetAddressUtils.inSameScope(beginAddress, priorEnd) && InetAddressUtils .difference(beginAddress, priorEnd).compareTo(BigInteger.ONE) <= 0) { priorBegin = new InetAddressComparator().compare(priorBegin, beginAddress) < 0 ? priorBegin : beginAddress; priorRange.setBegin(InetAddressUtils.toIpAddrString(priorBegin)); priorEnd = new InetAddressComparator().compare(priorEnd, endAddress) > 0 ? priorEnd : endAddress; priorRange.setEnd(InetAddressUtils.toIpAddrString(priorEnd)); rangesIterator.remove(); continue; } } priorRange = range; priorBegin = beginAddress; priorEnd = endAddress; } // Update changes made to sorted maps definition.setSpecific(specificsMap.values().toArray(new String[0])); definition.setRange(rangesMap.values().toArray(new Range[0])); } } finally { getWriteLock().unlock(); } }
From source file:org.apache.hadoop.hive.metastore.MetaStoreDirectSql.java
/** Should be called with the list short enough to not trip up Oracle/etc. */ private List<Partition> getPartitionsFromPartitionIds(String dbName, String tblName, Boolean isView, List<Object> partIdList) throws MetaException { boolean doTrace = LOG.isDebugEnabled(); int idStringWidth = (int) Math.ceil(Math.log10(partIdList.size())) + 1; // 1 for comma int sbCapacity = partIdList.size() * idStringWidth; // Prepare StringBuilder for "PART_ID in (...)" to use in future queries. StringBuilder partSb = new StringBuilder(sbCapacity); for (Object partitionId : partIdList) { partSb.append(extractSqlLong(partitionId)).append(","); }/*w ww . j a v a 2 s .c om*/ String partIds = trimCommaList(partSb); // Get most of the fields for the IDs provided. // Assume db and table names are the same for all partition, as provided in arguments. String queryText = "select " + PARTITIONS + ".\"PART_ID\", " + SDS + ".\"SD_ID\", " + SDS + ".\"CD_ID\"," + " " + SERDES + ".\"SERDE_ID\", " + PARTITIONS + ".\"CREATE_TIME\"," + " " + PARTITIONS + ".\"LAST_ACCESS_TIME\", " + SDS + ".\"INPUT_FORMAT\", " + SDS + ".\"IS_COMPRESSED\"," + " " + SDS + ".\"IS_STOREDASSUBDIRECTORIES\", " + SDS + ".\"LOCATION\", " + SDS + ".\"NUM_BUCKETS\"," + " " + SDS + ".\"OUTPUT_FORMAT\", " + SERDES + ".\"NAME\", " + SERDES + ".\"SLIB\" " + "from " + PARTITIONS + "" + " left outer join " + SDS + " on " + PARTITIONS + ".\"SD_ID\" = " + SDS + ".\"SD_ID\" " + " left outer join " + SERDES + " on " + SDS + ".\"SERDE_ID\" = " + SERDES + ".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc"; long start = doTrace ? System.nanoTime() : 0; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); List<Object[]> sqlResult = executeWithArray(query, null, queryText); long queryTime = doTrace ? System.nanoTime() : 0; Deadline.checkTimeout(); // Read all the fields and create partitions, SDs and serdes. TreeMap<Long, Partition> partitions = new TreeMap<Long, Partition>(); TreeMap<Long, StorageDescriptor> sds = new TreeMap<Long, StorageDescriptor>(); TreeMap<Long, SerDeInfo> serdes = new TreeMap<Long, SerDeInfo>(); TreeMap<Long, List<FieldSchema>> colss = new TreeMap<Long, List<FieldSchema>>(); // Keep order by name, consistent with JDO. ArrayList<Partition> orderedResult = new ArrayList<Partition>(partIdList.size()); // Prepare StringBuilder-s for "in (...)" lists to use in one-to-many queries. StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new StringBuilder(sbCapacity); StringBuilder colsSb = new StringBuilder(7); // We expect that there's only one field schema. tblName = tblName.toLowerCase(); dbName = dbName.toLowerCase(); for (Object[] fields : sqlResult) { // Here comes the ugly part... long partitionId = extractSqlLong(fields[0]); Long sdId = extractSqlLong(fields[1]); Long colId = extractSqlLong(fields[2]); Long serdeId = extractSqlLong(fields[3]); // A partition must have at least sdId and serdeId set, or nothing set if it's a view. if (sdId == null || serdeId == null) { if (isView == null) { isView = isViewTable(dbName, tblName); } if ((sdId != null || colId != null || serdeId != null) || !isView) { throw new MetaException("Unexpected null for one of the IDs, SD " + sdId + ", serde " + serdeId + " for a " + (isView ? "" : "non-") + " view"); } } Partition part = new Partition(); orderedResult.add(part); // Set the collection fields; some code might not check presence before accessing them. part.setParameters(new HashMap<String, String>()); part.setValues(new ArrayList<String>()); part.setDbName(dbName); part.setTableName(tblName); if (fields[4] != null) part.setCreateTime(extractSqlInt(fields[4])); if (fields[5] != null) part.setLastAccessTime(extractSqlInt(fields[5])); partitions.put(partitionId, part); if (sdId == null) continue; // Probably a view. assert serdeId != null; // We assume each partition has an unique SD. StorageDescriptor sd = new StorageDescriptor(); StorageDescriptor oldSd = sds.put(sdId, sd); if (oldSd != null) { throw new MetaException("Partitions reuse SDs; we don't expect that"); } // Set the collection fields; some code might not check presence before accessing them. sd.setSortCols(new ArrayList<Order>()); sd.setBucketCols(new ArrayList<String>()); sd.setParameters(new HashMap<String, String>()); sd.setSkewedInfo(new SkewedInfo(new ArrayList<String>(), new ArrayList<List<String>>(), new HashMap<List<String>, String>())); sd.setInputFormat((String) fields[6]); Boolean tmpBoolean = extractSqlBoolean(fields[7]); if (tmpBoolean != null) sd.setCompressed(tmpBoolean); tmpBoolean = extractSqlBoolean(fields[8]); if (tmpBoolean != null) sd.setStoredAsSubDirectories(tmpBoolean); sd.setLocation((String) fields[9]); if (fields[10] != null) sd.setNumBuckets(extractSqlInt(fields[10])); sd.setOutputFormat((String) fields[11]); sdSb.append(sdId).append(","); part.setSd(sd); if (colId != null) { List<FieldSchema> cols = colss.get(colId); // We expect that colId will be the same for all (or many) SDs. if (cols == null) { cols = new ArrayList<FieldSchema>(); colss.put(colId, cols); colsSb.append(colId).append(","); } sd.setCols(cols); } // We assume each SD has an unique serde. SerDeInfo serde = new SerDeInfo(); SerDeInfo oldSerde = serdes.put(serdeId, serde); if (oldSerde != null) { throw new MetaException("SDs reuse serdes; we don't expect that"); } serde.setParameters(new HashMap<String, String>()); serde.setName((String) fields[12]); serde.setSerializationLib((String) fields[13]); serdeSb.append(serdeId).append(","); sd.setSerdeInfo(serde); Deadline.checkTimeout(); } query.closeAll(); timingTrace(doTrace, queryText, start, queryTime); // Now get all the one-to-many things. Start with partitions. queryText = "select \"PART_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from " + PARTITION_PARAMS + "" + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null" + " order by \"PART_ID\" asc"; loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() { @Override public void apply(Partition t, Object[] fields) { t.putToParameters((String) fields[1], (String) fields[2]); } }); // Perform conversion of null map values for (Partition t : partitions.values()) { t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } queryText = "select \"PART_ID\", \"PART_KEY_VAL\" from " + PARTITION_KEY_VALS + "" + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc"; loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() { @Override public void apply(Partition t, Object[] fields) { t.addToValues((String) fields[1]); } }); // Prepare IN (blah) lists for the following queries. Cut off the final ','s. if (sdSb.length() == 0) { assert serdeSb.length() == 0 && colsSb.length() == 0; return orderedResult; // No SDs, probably a view. } String sdIds = trimCommaList(sdSb); String serdeIds = trimCommaList(serdeSb); String colIds = trimCommaList(colsSb); // Get all the stuff for SD. Don't do empty-list check - we expect partitions do have SDs. queryText = "select \"SD_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from " + SD_PARAMS + "" + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null" + " order by \"SD_ID\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { @Override public void apply(StorageDescriptor t, Object[] fields) { t.putToParameters((String) fields[1], extractSqlClob(fields[2])); } }); // Perform conversion of null map values for (StorageDescriptor t : sds.values()) { t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } queryText = "select \"SD_ID\", \"COLUMN_NAME\", " + SORT_COLS + ".\"ORDER\"" + " from " + SORT_COLS + "" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { @Override public void apply(StorageDescriptor t, Object[] fields) { if (fields[2] == null) return; t.addToSortCols(new Order((String) fields[1], extractSqlInt(fields[2]))); } }); queryText = "select \"SD_ID\", \"BUCKET_COL_NAME\" from " + BUCKETING_COLS + "" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { @Override public void apply(StorageDescriptor t, Object[] fields) { t.addToBucketCols((String) fields[1]); } }); // Skewed columns stuff. queryText = "select \"SD_ID\", \"SKEWED_COL_NAME\" from " + SKEWED_COL_NAMES + "" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc"; boolean hasSkewedColumns = loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { @Override public void apply(StorageDescriptor t, Object[] fields) { if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo()); t.getSkewedInfo().addToSkewedColNames((String) fields[1]); } }) > 0; // Assume we don't need to fetch the rest of the skewed column data if we have no columns. if (hasSkewedColumns) { // We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless. queryText = "select " + SKEWED_VALUES + ".\"SD_ID_OID\"," + " " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_ID\"," + " " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_VALUE\" " + "from " + SKEWED_VALUES + " " + " left outer join " + SKEWED_STRING_LIST_VALUES + " on " + SKEWED_VALUES + "." + "\"STRING_LIST_ID_EID\" = " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_ID\" " + "where " + SKEWED_VALUES + ".\"SD_ID_OID\" in (" + sdIds + ") " + " and " + SKEWED_VALUES + ".\"STRING_LIST_ID_EID\" is not null " + " and " + SKEWED_VALUES + ".\"INTEGER_IDX\" >= 0 " + "order by " + SKEWED_VALUES + ".\"SD_ID_OID\" asc, " + SKEWED_VALUES + ".\"INTEGER_IDX\" asc," + " " + SKEWED_STRING_LIST_VALUES + ".\"INTEGER_IDX\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { private Long currentListId; private List<String> currentList; @Override public void apply(StorageDescriptor t, Object[] fields) throws MetaException { if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo()); // Note that this is not a typical list accumulator - there's no call to finalize // the last list. Instead we add list to SD first, as well as locally to add elements. if (fields[1] == null) { currentList = null; // left outer join produced a list with no values currentListId = null; t.getSkewedInfo().addToSkewedColValues(Collections.<String>emptyList()); } else { long fieldsListId = extractSqlLong(fields[1]); if (currentListId == null || fieldsListId != currentListId) { currentList = new ArrayList<String>(); currentListId = fieldsListId; t.getSkewedInfo().addToSkewedColValues(currentList); } currentList.add((String) fields[2]); } } }); // We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless. queryText = "select " + SKEWED_COL_VALUE_LOC_MAP + ".\"SD_ID\"," + " " + SKEWED_STRING_LIST_VALUES + ".STRING_LIST_ID," + " " + SKEWED_COL_VALUE_LOC_MAP + ".\"LOCATION\"," + " " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_VALUE\" " + "from " + SKEWED_COL_VALUE_LOC_MAP + "" + " left outer join " + SKEWED_STRING_LIST_VALUES + " on " + SKEWED_COL_VALUE_LOC_MAP + "." + "\"STRING_LIST_ID_KID\" = " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_ID\" " + "where " + SKEWED_COL_VALUE_LOC_MAP + ".\"SD_ID\" in (" + sdIds + ")" + " and " + SKEWED_COL_VALUE_LOC_MAP + ".\"STRING_LIST_ID_KID\" is not null " + "order by " + SKEWED_COL_VALUE_LOC_MAP + ".\"SD_ID\" asc," + " " + SKEWED_STRING_LIST_VALUES + ".\"STRING_LIST_ID\" asc," + " " + SKEWED_STRING_LIST_VALUES + ".\"INTEGER_IDX\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() { private Long currentListId; private List<String> currentList; @Override public void apply(StorageDescriptor t, Object[] fields) throws MetaException { if (!t.isSetSkewedInfo()) { SkewedInfo skewedInfo = new SkewedInfo(); skewedInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>()); t.setSkewedInfo(skewedInfo); } Map<List<String>, String> skewMap = t.getSkewedInfo().getSkewedColValueLocationMaps(); // Note that this is not a typical list accumulator - there's no call to finalize // the last list. Instead we add list to SD first, as well as locally to add elements. if (fields[1] == null) { currentList = new ArrayList<String>(); // left outer join produced a list with no values currentListId = null; } else { long fieldsListId = extractSqlLong(fields[1]); if (currentListId == null || fieldsListId != currentListId) { currentList = new ArrayList<String>(); currentListId = fieldsListId; } else { skewMap.remove(currentList); // value based compare.. remove first } currentList.add((String) fields[3]); } skewMap.put(currentList, (String) fields[2]); } }); } // if (hasSkewedColumns) // Get FieldSchema stuff if any. if (!colss.isEmpty()) { // We are skipping the CDS table here, as it seems to be totally useless. queryText = "select \"CD_ID\", \"COMMENT\", \"COLUMN_NAME\", \"TYPE_NAME\"" + " from " + COLUMNS_V2 + " where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc"; loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() { @Override public void apply(List<FieldSchema> t, Object[] fields) { t.add(new FieldSchema((String) fields[2], extractSqlClob(fields[3]), (String) fields[1])); } }); } // Finally, get all the stuff for serdes - just the params. queryText = "select \"SERDE_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from " + SERDE_PARAMS + "" + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null" + " order by \"SERDE_ID\" asc"; loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() { @Override public void apply(SerDeInfo t, Object[] fields) { t.putToParameters((String) fields[1], extractSqlClob(fields[2])); } }); // Perform conversion of null map values for (SerDeInfo t : serdes.values()) { t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } return orderedResult; }
From source file:org.opensextant.extractors.geo.GazetteerMatcher.java
/** * Geotag a document, returning PlaceCandidates for the mentions in * document. Optionally just return the PlaceCandidates with name only and * no Place objects attached. Names of contients are passed back as matches, * with geo matches. Continents are filtered out by default. * * @param buffer/*from w ww .j av a 2 s . c o m*/ * text * @param docid * identity of the text * @param tagOnly * True if you wish to get the matched phrases only. False if you * want the full list of Place Candidates. * @param fld * gazetteer field to use for tagging * @return place_candidates List of place candidates * @throws ExtractionException * on err */ public List<PlaceCandidate> tagText(String buffer, String docid, boolean tagOnly, String fld) throws ExtractionException { // "tagsCount":10, "tags":[{ "ids":[35], "endOffset":40, // "startOffset":38}, // { "ids":[750308, 2769912, 2770041, 10413973, 10417546], // "endOffset":49, // "startOffset":41}, // ... // "matchingDocs":{"numFound":75, "start":0, "docs":[ { // "place_id":"USGS1992921", "name":"Monterrey", "cc":"PR"}, { // "place_id":"USGS1991763", "name":"Monterrey", "cc":"PR"}, ] // Reset counts. this.defaultFilterCount = 0; this.userFilterCount = 0; // during post-processing tags we may have to distinguish between tagging/tokenizing // general vs. cjk vs. ar. But not yet though. // boolean useGeneralMode = DEFAULT_TAG_FIELD.equals(fld); long t0 = System.currentTimeMillis(); log.debug("TEXT SIZE = {}", buffer.length()); int[] textMetrics = TextUtils.measureCase(buffer); boolean isUpperCase = TextUtils.isUpperCaseDocument(textMetrics); params.set("field", fld); Map<Integer, Object> beanMap = new HashMap<Integer, Object>(100); QueryResponse response = tagTextCallSolrTagger(buffer, docid, beanMap); @SuppressWarnings("unchecked") List<NamedList<?>> tags = (List<NamedList<?>>) response.getResponse().get("tags"); this.tagNamesTime = response.getQTime(); long t1 = t0 + tagNamesTime; long t2 = System.currentTimeMillis(); boolean geocode = !tagOnly; /* * Retrieve all offsets into a long list. These offsets will report a * text span and all the gazetteer record IDs that are associated to * that span. The text could either be a name, a code or some other * abbreviation. * * For practical reasons the default behavior is to filter trivial spans * given the gazetteer data that is returned for them. * * WARNING: lots of optimizations occur here due to the potentially * large volume of tags and gazetteer data that is involved. And this is * relatively early in the pipline. */ log.debug("DOC={} TAGS SIZE={}", docid, tags.size()); TreeMap<Integer, PlaceCandidate> candidates = new TreeMap<Integer, PlaceCandidate>(); // names matched is used only for debugging, currently. Set<String> namesMatched = new HashSet<>(); tagLoop: for (NamedList<?> tag : tags) { int x1 = (Integer) tag.get("startOffset"); int x2 = (Integer) tag.get("endOffset"); int len = x2 - x1; if (len == 1) { // Ignoring place names whose length is less than 2 chars ++this.defaultFilterCount; continue; } // +1 char after last matched // Could have enabled the "matchText" option from the tagger to get // this, but since we already have the content as a String then // we might as well not make the tagger do any more work. String matchText = (String) tag.get("matchText"); // Get char immediately following match, for light NLP rules. char postChar = 0; if (x2 < buffer.length()) { postChar = buffer.charAt(x2); } // Then filter out trivial matches. E.g., Us is filtered out. vs. US would // be allowed. If lowercase abbreviations are allowed, then all matches are passed. if (len < 3) { if (TextUtils.isASCII(matchText) && !StringUtils.isAllUpperCase(matchText) && !allowLowercaseAbbrev) { ++this.defaultFilterCount; continue; } } if (TextUtils.countFormattingSpace(matchText) > 1) { // Phrases with words broken across more than one line are not // valid matches. // Phrase with a single TAB is okay ++this.defaultFilterCount; continue; } // Eliminate any newlines and extra whitespace in match matchText = TextUtils.squeeze_whitespace(matchText); /** * Filter out trivial tags. Due to normalization, we tend to get * lots of false positives that can be eliminated early. */ if (filter.filterOut(matchText)) { ++this.defaultFilterCount; continue; } PlaceCandidate pc = new PlaceCandidate(); pc.start = x1; pc.end = x2; pc.setText(matchText); /* * Filter out tags that user determined ahead of time as not-places * for their context. * */ if (userfilter != null) { if (userfilter.filterOut(pc.getTextnorm())) { log.debug("User Filter:{}", matchText); ++this.userFilterCount; continue; } } /* * Continent filter is needed, as many mentions of contients confuse * real geotagging/geocoding. * */ if (continents.filterOut(pc.getTextnorm())) { pc.isContinent = true; pc.setFilteredOut(true); candidates.put(pc.start, pc); continue; } /* * Found UPPER CASE text in a mixed-cased document. * Conservatively, this is likely an acronym or some heading. * But possibly still a valid place name. * HEURISTIC: acronyms are relatively short. * HEURISTIC: region codes can be acronyms and are valid places * * using such place candidates you may score short acronym matches lower than fully named ones. * when inferring boundaries (states, provinces, etc) */ if (!isUpperCase && pc.isUpper() && len < 5) { pc.isAcronym = true; } /* * Everything Else. */ pc.setSurroundingTokens(buffer); @SuppressWarnings("unchecked") List<Integer> placeRecordIds = (List<Integer>) tag.get("ids"); /* * This assertion is helpful in debugging: assert * placeRecordIds.size() == new * HashSet<Integer>(placeRecordIds).size() : "ids should be unique"; */ // assert!placeRecordIds.isEmpty(); namesMatched.clear(); //double maxNameBias = 0.0; for (Integer solrId : placeRecordIds) { // Yes, we must cast here. // As long as createTag generates the correct type stored in // beanMap we are fine. ScoredPlace pGeo = (ScoredPlace) beanMap.get(solrId); // assert pGeo != null; // Optimization: abbreviation filter. // // Do not add PlaceCandidates for lower case tokens that are // marked as Abbreviations, unless flagged to do so. // // DEFAULT behavior is to avoid lower case text that is tagged // as an abbreviation in gazetteer, // // Common terms: in, or, oh, me, us, we, etc. Are all not // typically place names or valid abbreviations in text. // if (!allowLowercaseAbbrev && pGeo.isAbbreviation() && pc.isLower()) { log.debug("Ignore lower case term={}", pc.getText()); // DWS: TODO what if there is another pGeo for this pc that // isn't an abbrev? Therefore shouldn't we continue this // loop and not tagLoop? continue tagLoop; } /* * If text match contains "." and it matches any abbreviation, * mark the candidate as an abbrev. TODO: Possibly best confirm * this by sentence detection, as well. However, this pertains * to text spans that contain "." within the bounds, and not * likely an ending. E.g., "U.S." or "U.S" are trivial examples; * "US" is more ambiguous, as we need to know if document is * upperCase. * * Any place abbreviation will trigger isAbbreviation = true * * "IF YOU FIND US HERE" the term 'US' is ambiguous here, so * it is not classified as an abbreviation. Otherwise if you have * "My organization YAK happens to coincide with a place named Yak. * But we first must determine if 'YAK' is a valid abbreviation for an actual place. * HEURISTIC: place abbreviations are relatively short, e.g. one word(len=7 or less) */ if (len < 8 && !pc.isAbbreviation) { assessAbbreviation(pc, pGeo, postChar, isUpperCase); } if (log.isDebugEnabled()) { namesMatched.add(pGeo.getName()); } /** * Country names are the only names you can reasonably set ahead * of time. All other names need to be assessed in context. * Negate country names, e.g., "Georgia", by exception. */ if (pGeo.isCountry()) { pc.isCountry = true; } if (geocode) { pGeo.defaultHierarchicalPath(); // Default score for geo will be calculated in PlaceCandidate pc.addPlace(pGeo); } } // If geocoding, skip this PlaceCandidate if has no places (e.g. due // to filtering) if (geocode && !pc.hasPlaces()) { log.debug("Place has no places={}", pc.getText()); continue; } else { if (log.isDebugEnabled()) { log.debug("Text {} matched {}", pc.getText(), namesMatched); } } candidates.put(pc.start, pc); } // for tag long t3 = System.currentTimeMillis(); // this.tagNamesTime = (int)(t1 - t0); this.getNamesTime = (int) (t2 - t1); this.totalTime = (int) (t3 - t0); if (log.isDebugEnabled()) { summarizeExtraction(candidates.values(), docid); } this.filteredTotal += this.defaultFilterCount + this.userFilterCount; this.matchedTotal += candidates.size(); return new ArrayList<PlaceCandidate>(candidates.values()); }
From source file:lasige.steeldb.jdbc.BFTRowSet.java
/** * Converts this <code>CachedRowSetImpl</code> object to a collection * of tables. The sample implementation utilitizes the <code>TreeMap</code> * collection type./*from w w w .j a v a 2 s .c om*/ * This class guarantees that the map will be in ascending key order, * sorted according to the natural order for the key's class. * * @return a <code>Collection</code> object consisting of tables, * each of which is a copy of a row in this * <code>CachedRowSetImpl</code> object * @throws SQLException if an error occurs in generating the collection * @see #toCollection(int) * @see #toCollection(String) * @see java.util.TreeMap */ public Collection<?> toCollection() throws SQLException { TreeMap<Integer, BaseRow> tMap; tMap = new TreeMap<Integer, BaseRow>(); for (int i = 0; i < numRows; i++) { tMap.put(new Integer(i), rvh.get(i)); } return (tMap.values()); }
From source file:org.ncic.bioinfo.sparkseq.algorithms.walker.mutect.Mutect.java
@Override protected void map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext) { final char upRef = Character.toUpperCase((char) ref.getBase()); if (upRef != 'A' && upRef != 'C' && upRef != 'G' && upRef != 'T') { return;//w ww . j a v a2 s. co m } ReadBackedPileup tumorPileup = cleanNoneRefPileupElement(rawContext.getBasePileup()); ReadBackedPileup normalPileup = cleanNoneRefPileupElement(normalSamTraverser.next().getBasePileup()); // an optimization to speed things up when there is no coverage if (tumorPileup.depthOfCoverage() == 0 && normalPileup.depthOfCoverage() == 0) { return; } TreeMap<Double, CandidateMutation> messageByTumorLod = new TreeMap<Double, CandidateMutation>(); // get sequence context around mutation String sequenceContext = SequenceUtils.createSequenceContext(this.refContentProvider, ref, 3); try { final LocusReadPile tumorReadPile = new LocusReadPile(tumorPileup, upRef, MTAC.MIN_QSCORE, MIN_QSUM_QSCORE, false, MTAC.ARTIFACT_DETECTION_MODE, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile normalReadPile = new LocusReadPile(normalPileup, upRef, MTAC.MIN_QSCORE, 0, this.USE_MAPQ0_IN_NORMAL_QSCORE, true, MTAC.ENABLE_QSCORE_OUTPUT); Collection<VariantContext> panelOfNormalsVC = tracker.getValues(normalPanelRod, rawContext.getLocation()); Collection<VariantContext> cosmicVC = getVCInTrackerInLocus(RODNames.COSMIC, tracker); Collection<VariantContext> dbsnpVC = getVCInTrackerInLocus(RODNames.DBSNP, tracker); // remove the effect of cosmic from dbSNP boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty()); // compute coverage flags int tumorCoveredDepthThreshold = 14; int normalCoveredDepthThreshold = (germlineAtRisk) ? 19 : 8; if (!hasNormalBam) { normalCoveredDepthThreshold = 0; } int tumorBaseCount = tumorReadPile.finalPileupReads.size(); int normalBaseCount = normalReadPile.finalPileupReads.size(); boolean isTumorCovered = tumorBaseCount >= tumorCoveredDepthThreshold; boolean isNormalCovered = normalBaseCount >= normalCoveredDepthThreshold; boolean isBaseCovered = isTumorCovered && isNormalCovered; if (!hasNormalBam) { isBaseCovered = isTumorCovered; } int tumorQ20BaseCount = tumorReadPile.getFilteredBaseCount(20); int normalQ20BaseCount = normalReadPile.getFilteredBaseCount(20); // calculate power double tumorPower = tumorPowerCalculator.cachingPowerCalculation(tumorBaseCount, MTAC.POWER_CONSTANT_AF); double normalPowerNoSNPPrior = normalNovelSitePowerCalculator.cachingPowerCalculation(normalBaseCount); double normalPowerWithSNPPrior = normalDbSNPSitePowerCalculator .cachingPowerCalculation(normalBaseCount); double normalPower = (germlineAtRisk) ? normalPowerWithSNPPrior : normalPowerNoSNPPrior; double combinedPower = tumorPower * normalPower; if (!hasNormalBam) { combinedPower = tumorPower; } int mapQ0Reads = tumorReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads() + normalReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads(); int totalReads = tumorReadPile.qualityScoreFilteredPileup.depthOfCoverage() + normalReadPile.qualityScoreFilteredPileup.depthOfCoverage(); // Test each of the possible alternate alleles for (final char altAllele : new char[] { 'A', 'C', 'G', 'T' }) { if (altAllele == upRef) { continue; } if (!MTAC.FORCE_OUTPUT && tumorReadPile.qualitySums.getCounts(altAllele) == 0) { continue; } CandidateMutation candidate = new CandidateMutation(rawContext.getLocation(), upRef); candidate.setSequenceContext(sequenceContext); candidate.setTumorSampleName(MTAC.TUMOR_SAMPLE_NAME); candidate.setNormalSampleName(MTAC.NORMAL_SAMPLE_NAME); candidate.setCovered(isBaseCovered); candidate.setPower(combinedPower); candidate.setTumorPower(tumorPower); candidate.setNormalPower(normalPower); candidate.setNormalPowerWithSNPPrior(normalPowerWithSNPPrior); candidate.setNormalPowerNoSNPPrior(normalPowerNoSNPPrior); candidate.setTumorQ20Count(tumorQ20BaseCount); candidate.setNormalQ20Count(normalQ20BaseCount); candidate.setInitialTumorNonRefQualitySum(tumorReadPile.qualitySums.getOtherQualities(upRef)); candidate.setAltAllele(altAllele); candidate.setMapQ0Reads(mapQ0Reads); candidate.setTotalReads(totalReads); candidate.setContaminationFraction(MTAC.FRACTION_CONTAMINATION); candidate.setPanelOfNormalsVC( panelOfNormalsVC.isEmpty() ? null : panelOfNormalsVC.iterator().next()); // if there are multiple, we're just grabbing the first candidate.setCosmicSite(!cosmicVC.isEmpty()); candidate.setDbsnpSite(!dbsnpVC.isEmpty()); candidate.setDbsnpVC(dbsnpVC.isEmpty() ? null : dbsnpVC.iterator().next()); candidate.setTumorF(tumorReadPile.estimateAlleleFraction(upRef, altAllele)); if (!MTAC.FORCE_OUTPUT && candidate.getTumorF() < MTAC.TUMOR_F_PRETEST) { continue; } candidate.setInitialTumorAltCounts(tumorReadPile.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(tumorReadPile.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(tumorReadPile.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(tumorReadPile.qualitySums.getQualitySum(upRef)); double tumorLod = tumorReadPile.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod); candidate.setInitialTumorReadDepth(tumorReadPile.finalPileupReads.size()); candidate.setTumorInsertionCount(tumorReadPile.getInsertionsCount()); candidate.setTumorDeletionCount(tumorReadPile.getDeletionsCount()); if (candidate.getTumorLodFStar() < MTAC.INITIAL_TUMOR_LOD_THRESHOLD) { continue; } // calculate lod of contaminant double contaminantF = Math.min(contaminantAlternateFraction, candidate.getTumorF()); VariableAllelicRatioGenotypeLikelihoods contaminantLikelihoods = new VariableAllelicRatioGenotypeLikelihoods( upRef, contaminantF); List<PileupElement> peList = new ArrayList<PileupElement>( tumorReadPile.finalPileup.depthOfCoverage()); for (PileupElement pe : tumorReadPile.finalPileup) { peList.add(pe); } Collections.sort(peList, new PileupComparatorByAltRefQual((byte) altAllele)); int readsToKeep = (int) (peList.size() * contaminantAlternateFraction); for (PileupElement pe : peList) { byte base = pe.getBase(); if (pe.getBase() == altAllele) { // if we've retained all we need, then turn the remainder of alts to ref if (readsToKeep == 0) { base = (byte) upRef; } else { readsToKeep--; } } contaminantLikelihoods.add(base, pe.getQual()); } double[] refHetHom = LocusReadPile.extractRefHetHom(contaminantLikelihoods, upRef, altAllele); double contaminantLod = refHetHom[1] - refHetHom[0]; candidate.setContaminantLod(contaminantLod); final QualitySums normQs = normalReadPile.qualitySums; VariableAllelicRatioGenotypeLikelihoods normalGl = normalReadPile .calculateLikelihoods(normalReadPile.qualityScoreFilteredPileup); // use MAPQ0 reads candidate.setInitialNormalBestGenotype(normalReadPile.getBestGenotype(normalGl)); candidate.setInitialNormalLod(LocusReadPile.getRefVsAlt(normalGl, upRef, altAllele)); double normalF = Math.max(LocusReadPile .estimateAlleleFraction(normalReadPile.qualityScoreFilteredPileup, upRef, altAllele), MTAC.MINIMUM_NORMAL_ALLELE_FRACTION); candidate.setNormalF(normalF); candidate.setInitialNormalAltQualitySum(normQs.getQualitySum(altAllele)); candidate.setInitialNormalRefQualitySum(normQs.getQualitySum(upRef)); candidate.setNormalAltQualityScores(normQs.getBaseQualityScores(altAllele)); candidate.setNormalRefQualityScores(normQs.getBaseQualityScores(upRef)); candidate.setInitialNormalAltCounts(normQs.getCounts(altAllele)); candidate.setInitialNormalRefCounts(normQs.getCounts(upRef)); candidate.setInitialNormalReadDepth(normalReadPile.finalPileupReads.size()); // TODO: parameterize filtering Mate-Rescued Reads (if someone wants to disable this) final LocusReadPile t2 = filterReads(ref, tumorReadPile.finalPileup, true); // if there are no reads remaining, abandon this theory if (!MTAC.FORCE_OUTPUT && t2.finalPileupReads.size() == 0) { continue; } candidate.setInitialTumorAltCounts(t2.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(t2.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(t2.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(t2.qualitySums.getQualitySum(upRef)); candidate.setTumorAltQualityScores(t2.qualitySums.getBaseQualityScores(altAllele)); candidate.setTumorRefQualityScores(t2.qualitySums.getBaseQualityScores(upRef)); VariableAllelicRatioGenotypeLikelihoods t2Gl = t2.calculateLikelihoods(t2.finalPileup); candidate.setInitialTumorLod(t2.getAltVsRef(t2Gl, upRef, altAllele)); candidate.setInitialTumorReadDepth(t2.finalPileupReads.size()); candidate.setTumorF(t2.estimateAlleleFraction(upRef, altAllele)); double tumorLod2 = t2.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod2); //TODO: clean up use of forward/reverse vs positive/negative (prefer the latter since GATK uses it) ReadBackedPileup forwardPileup = filterReads(ref, tumorReadPile.finalPileupPositiveStrand, true).finalPileupPositiveStrand; double f2forward = LocusReadPile.estimateAlleleFraction(forwardPileup, upRef, altAllele); candidate.setTumorLodFStarForward( t2.calculateAltVsRefLOD(forwardPileup, (byte) altAllele, f2forward, 0.0)); ReadBackedPileup reversePileup = filterReads(ref, tumorReadPile.finalPileupNegativeStrand, true).finalPileupNegativeStrand; double f2reverse = LocusReadPile.estimateAlleleFraction(reversePileup, upRef, altAllele); candidate.setTumorLodFStarReverse( t2.calculateAltVsRefLOD(reversePileup, (byte) altAllele, f2reverse, 0.0)); // calculate strand bias power candidate.setPowerToDetectPositiveStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(reversePileup.depthOfCoverage(), candidate.getTumorF())); candidate.setPowerToDetectNegativeStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(forwardPileup.depthOfCoverage(), candidate.getTumorF())); candidate.setStrandContingencyTable(SequenceUtils.getStrandContingencyTable(forwardPileup, reversePileup, (byte) upRef, (byte) altAllele)); ArrayList<PileupElement> mutantPileupElements = new ArrayList<PileupElement>(); ArrayList<PileupElement> referencePileupElements = new ArrayList<PileupElement>(); for (PileupElement p : t2.finalPileup) { final SAMRecord read = p.getRead(); final int offset = p.getOffset(); if (read.getReadString().charAt(offset) == altAllele) { mutantPileupElements.add(p); } else if (read.getReadString().charAt(offset) == upRef) { referencePileupElements.add(p); } else { // just drop the read... } } ReadBackedPileup mutantPileup = new ReadBackedPileupImpl(rawContext.getLocation(), mutantPileupElements); ReadBackedPileup referencePileup = new ReadBackedPileupImpl(rawContext.getLocation(), referencePileupElements); // TODO: shouldn't this be refAllele here? final LocusReadPile mutantPile = new LocusReadPile(mutantPileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile refPile = new LocusReadPile(referencePileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); // Set the maximum observed mapping quality score for the reference and alternate alleles int[] rmq = referencePileup.getMappingQuals(); candidate.setTumorRefMaxMapQ((rmq.length == 0) ? 0 : NumberUtils.max(rmq)); int[] amq = mutantPileup.getMappingQuals(); candidate.setTumorAltMaxMapQ((amq.length == 0) ? 0 : NumberUtils.max(amq)); // start with just the tumor pile candidate.setTumorAltForwardOffsetsInRead(SequenceUtils.getForwardOffsetsInRead(mutantPileup)); candidate.setTumorAltReverseOffsetsInRead(SequenceUtils.getReverseOffsetsInRead(mutantPileup)); if (candidate.getTumorAltForwardOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltForwardOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorForwardOffsetsInReadMedian(median); candidate.setTumorForwardOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } if (candidate.getTumorAltReverseOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltReverseOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorReverseOffsetsInReadMedian(median); candidate.setTumorReverseOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } // test to see if the candidate should be rejected performRejection(candidate); messageByTumorLod.put(candidate.getInitialTumorLod(), candidate); } // if more than one site passes the tumor lod threshold for KEEP the fail the tri_allelic Site filter int passingCandidates = 0; for (CandidateMutation c : messageByTumorLod.values()) { if (c.getTumorLodFStar() >= MTAC.TUMOR_LOD_THRESHOLD) { passingCandidates++; } } if (passingCandidates > 1) { for (CandidateMutation c : messageByTumorLod.values()) { c.addRejectionReason("triallelic_site"); } } // write out the call stats for the "best" candidate if (!messageByTumorLod.isEmpty()) { CandidateMutation m = messageByTumorLod.lastEntry().getValue(); // only output passing calls OR rejected sites if ONLY_PASSING_CALLS is not specified if (!m.isRejected() || (m.isRejected() && !MTAC.ONLY_PASSING_CALLS)) { //out.println(callStatsGenerator.generateCallStats(m)); resultVCFOutInfos.add(callStatsGenerator.generateCallStats(m)); resultVCFRecords.add(VCFGenerator.generateVC(m)); } } } catch (MathException me) { throw new GATKException(me.getMessage()); } }
From source file:io.warp10.continuum.gts.GTSHelper.java
public static List<GeoTimeSerie> chunk(GeoTimeSerie gts, long lastchunk, long chunkwidth, long chunkcount, String chunklabel, boolean keepempty, long overlap) throws WarpScriptException { if (overlap < 0 || overlap > chunkwidth) { throw new WarpScriptException("Overlap cannot exceed chunk width."); }// w w w . j a va2s . co m // // Check if 'chunklabel' exists in the GTS labels // Metadata metadata = gts.getMetadata(); if (metadata.getLabels().containsKey(chunklabel)) { throw new WarpScriptException( "Cannot operate on Geo Time Series which already have a label named '" + chunklabel + "'"); } TreeMap<Long, GeoTimeSerie> chunks = new TreeMap<Long, GeoTimeSerie>(); // // If GTS is bucketized, make sure bucketspan is less than boxwidth // boolean bucketized = GTSHelper.isBucketized(gts); if (bucketized) { if (gts.bucketspan > chunkwidth) { throw new WarpScriptException( "Cannot operate on Geo Time Series with a bucketspan greater than the chunk width."); } } else { // GTS is not bucketized and has 0 values, if lastchunk was 0, return an empty list as we // are unable to produce chunks if (0 == gts.values && 0L == lastchunk) { return new ArrayList<GeoTimeSerie>(); } } // // Set chunkcount to Integer.MAX_VALUE if it's 0 // boolean zeroChunkCount = false; if (0 == chunkcount) { chunkcount = Integer.MAX_VALUE; zeroChunkCount = true; } // // Sort timestamps in reverse order so we can produce all chunks in O(n) // GTSHelper.sort(gts, true); // // Loop on the chunks // // Index in the timestamp array int idx = 0; long bucketspan = gts.bucketspan; int bucketcount = gts.bucketcount; long lastbucket = gts.lastbucket; // // If lastchunk is 0, use lastbucket or the most recent tick // if (0 == lastchunk) { if (isBucketized(gts)) { lastchunk = lastbucket; } else { // Use the most recent tick lastchunk = gts.ticks[0]; // Make sure lastchunk is aligned on 'chunkwidth' boundary if (0 != (lastchunk % chunkwidth)) { lastchunk = lastchunk - (lastchunk % chunkwidth) + chunkwidth; } } } for (long i = 0; i < chunkcount; i++) { // If we have no more values and were not specified a chunk count, exit the loop, we're done if (idx >= gts.values && zeroChunkCount) { break; } // Compute chunk bounds long chunkend = lastchunk - i * chunkwidth; long chunkstart = chunkend - chunkwidth + 1; GeoTimeSerie chunkgts = new GeoTimeSerie(lastbucket, bucketcount, bucketspan, 16); // Set metadata for the GTS chunkgts.setMetadata(metadata); // Add 'chunklabel' chunkgts.getMetadata().putToLabels(chunklabel, Long.toString(chunkend)); if (bucketized) { // Chunk is outside the GTS, it will be empty if (lastbucket < chunkstart || chunkend <= lastbucket - (bucketcount * bucketspan)) { // Add the (empty) chunk if keepempty is true if (keepempty || overlap > 0) { chunks.put(chunkend, chunkgts); } continue; } // Set the bucketized parameters in the GTS // If bucketspan does not divide chunkwidth, chunks won't be bucketized if (0 == chunkwidth % bucketspan) { chunkgts.bucketspan = bucketspan; chunkgts.lastbucket = chunkend; chunkgts.bucketcount = (int) ((chunkend - chunkstart + 1) / bucketspan); } else { chunkgts.bucketspan = 0L; chunkgts.lastbucket = 0L; chunkgts.bucketspan = 0; } } // // Add the datapoints which fall within the current chunk // // Advance until the current tick is before 'chunkend' while (idx < gts.values && gts.ticks[idx] > chunkend) { idx++; } // We've exhausted the values if (idx >= gts.values) { // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } continue; } // The current tick is before the beginning of the current chunk if (gts.ticks[idx] < chunkstart) { // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } continue; } while (idx < gts.values && gts.ticks[idx] >= chunkstart) { GTSHelper.setValue(chunkgts, GTSHelper.tickAtIndex(gts, idx), GTSHelper.locationAtIndex(gts, idx), GTSHelper.elevationAtIndex(gts, idx), GTSHelper.valueAtIndex(gts, idx), false); idx++; } // only add chunk if it's not empty or empty with 'keepempty' set to true if (0 != chunkgts.values || (keepempty || overlap > 0)) { chunks.put(chunkend, chunkgts); } } // // Handle overlapping is need be. // We need to iterate over all ticks and add datapoints to each GTS they belong to // if (overlap > 0) { // // Check if we need to add a first and a last chunk // long ts = GTSHelper.tickAtIndex(gts, 0); if (ts <= chunks.firstKey() - chunkwidth) { Entry<Long, GeoTimeSerie> currentFirst = chunks.firstEntry(); GeoTimeSerie firstChunk = currentFirst.getValue().cloneEmpty(); if (GTSHelper.isBucketized(currentFirst.getValue())) { firstChunk.lastbucket = firstChunk.lastbucket - firstChunk.bucketspan; } chunks.put(currentFirst.getKey() - chunkwidth, firstChunk); } ts = GTSHelper.tickAtIndex(gts, gts.values - 1); if (ts >= chunks.lastKey() - chunkwidth + 1 - overlap) { Entry<Long, GeoTimeSerie> currentLast = chunks.lastEntry(); GeoTimeSerie lastChunk = currentLast.getValue().cloneEmpty(); if (GTSHelper.isBucketized(currentLast.getValue())) { lastChunk.lastbucket = lastChunk.lastbucket + lastChunk.bucketspan; } chunks.put(currentLast.getKey() + chunkwidth, lastChunk); } // // Put all entries in a list so we can access them randomly // List<Entry<Long, GeoTimeSerie>> allchunks = new ArrayList<Entry<Long, GeoTimeSerie>>(chunks.entrySet()); int[] currentSizes = new int[allchunks.size()]; for (int i = 0; i < currentSizes.length; i++) { currentSizes[i] = allchunks.get(i).getValue().values; } // // Iterate over chunks, completing with prev and next overlaps // Remember the timestamps are in reverse order so far. // for (int i = 0; i < allchunks.size(); i++) { GeoTimeSerie current = allchunks.get(i).getValue(); long lowerBound = allchunks.get(i).getKey() - chunkwidth + 1 - overlap; long upperBound = allchunks.get(i).getKey() + overlap; if (i > 0) { GeoTimeSerie prev = allchunks.get(i - 1).getValue(); for (int j = 0; j < currentSizes[i - 1]; j++) { long timestamp = GTSHelper.tickAtIndex(prev, j); if (timestamp < lowerBound) { break; } GTSHelper.setValue(current, timestamp, GTSHelper.locationAtIndex(prev, j), GTSHelper.elevationAtIndex(prev, j), GTSHelper.valueAtIndex(prev, j), false); } } if (i < allchunks.size() - 1) { GeoTimeSerie next = allchunks.get(i + 1).getValue(); for (int j = currentSizes[i + 1] - 1; j >= 0; j--) { long timestamp = GTSHelper.tickAtIndex(next, j); if (timestamp > upperBound) { break; } GTSHelper.setValue(current, timestamp, GTSHelper.locationAtIndex(next, j), GTSHelper.elevationAtIndex(next, j), GTSHelper.valueAtIndex(next, j), false); } } } } List<GeoTimeSerie> result = new ArrayList<GeoTimeSerie>(); for (GeoTimeSerie g : chunks.values()) { if (!keepempty && 0 == g.values) { continue; } result.add(g); } return result; }
From source file:org.opendaylight.vtn.manager.it.northbound.VtnNorthboundIT.java
/** * Test flow filter APIs.//from w ww. j ava2 s . com * * @param baseUri Absolute URI for test. * @param cookie An arbitrary integer to create test data. * @param ifPath The location of the virtual interface to configure * flow filter. {@code null} means that the target virtual * node is not a virtual interface. * @return A {@link JSONObject} instance that contains all flow filters * configured into the URI specified by {@code base}. * @throws JSONException An error occurred. */ private JSONObject testFlowFilterAPI(String baseUri, int cookie, VInterfacePath ifPath) throws JSONException { LOG.info("Starting flow filter JAX-RS client: {}", baseUri); // Get all flow filters. JSONObject json = getJSONObject(baseUri); JSONArray array = json.getJSONArray("flowfilter"); Assert.assertEquals(0, array.length()); TreeMap<Integer, JSONObject> allFilters = new TreeMap<Integer, JSONObject>(); // Create PASS flow filter with all supported actions. byte[] macAddr1 = { 0x00, 0x11, 0x22, 0x33, 0x44, (byte) cookie, }; byte[] macAddr2 = { (byte) 0xf0, (byte) 0xfa, (byte) 0xfb, (byte) 0xfc, (byte) cookie, (byte) 0xfe, }; JSONArray actions = new JSONArray() .put(createJSONObject("dlsrc", "address", ByteUtils.toHexString(macAddr1))) .put(createJSONObject("dldst", "address", ByteUtils.toHexString(macAddr2))) .put(createJSONObject("vlanpcp", "priority", cookie & MASK_VLAN_PCP)) .put(createJSONObject("inet4src", "address", "192.168.20.254")) .put(createJSONObject("inet4dst", "address", "10.20.30.40")) .put(createJSONObject("dscp", "dscp", cookie & MASK_IP_DSCP)) .put(createJSONObject("icmptype", "type", cookie & MASK_ICMP)) .put(createJSONObject("icmpcode", "code", ~cookie & MASK_ICMP)); JSONObject empty = new JSONObject(); JSONObject type = new JSONObject().put("pass", empty); JSONObject pass = new JSONObject().put("condition", "cond_1").put("filterType", type).put("actions", actions); int passIdx = (cookie + 1) & MASK_FLOWFILTER_INDEX; // Try to get flow filter that does not yet created. String uri = createRelativeURI(baseUri, String.valueOf(passIdx)); getJsonResult(uri); assertResponse(HTTP_NO_CONTENT); // Create PASS filter. getJsonResult(uri, HTTP_PUT, pass.toString()); assertResponse(HTTP_CREATED); Assert.assertEquals(uri, httpLocation); json = getJSONObject(uri); pass.put("index", passIdx); assertEquals(pass, json); allFilters.put(passIdx, json); getJsonResult(uri, HTTP_PUT, pass.toString()); assertResponse(HTTP_NO_CONTENT); // Create one more PASS filter with 3 actions. byte[] macAddr3 = { (byte) 0xa0, (byte) 0xb0, (byte) cookie, (byte) 0xd0, (byte) 0xe0, (byte) 0xf0, }; type = new JSONObject().put("pass", empty); actions = new JSONArray().put(createJSONObject("dlsrc", "address", ByteUtils.toHexString(macAddr3))) .put(createJSONObject("vlanpcp", "priority", (cookie + 13) & MASK_VLAN_PCP)) .put(createJSONObject("icmptype", "type", (cookie - 31) & MASK_ICMP)); JSONObject pass1 = new JSONObject().put("condition", "cond_2").put("filterType", type).put("actions", actions); int passIdx1 = (cookie + 7777) & MASK_FLOWFILTER_INDEX; String pass1Uri = createRelativeURI(baseUri, String.valueOf(passIdx1)); getJsonResult(pass1Uri, HTTP_PUT, pass1.toString()); assertResponse(HTTP_CREATED); Assert.assertEquals(pass1Uri, httpLocation); json = getJSONObject(pass1Uri); pass1.put("index", passIdx1); assertEquals(pass1, json); // Create DROP filter without actions. type = new JSONObject().put("drop", empty); // Index in JSON object should be ignored. JSONObject drop = new JSONObject().put("index", Integer.MAX_VALUE).put("condition", "cond_3") .put("filterType", type); int dropIdx = (cookie + 65535) & MASK_FLOWFILTER_INDEX; uri = createRelativeURI(baseUri, String.valueOf(dropIdx)); getJsonResult(uri, HTTP_PUT, drop.toString()); assertResponse(HTTP_CREATED); Assert.assertEquals(uri, httpLocation); json = getJSONObject(uri); drop.put("index", dropIdx); assertEquals(drop, json); getJsonResult(uri, HTTP_PUT, drop.toString()); assertResponse(HTTP_NO_CONTENT); // Append 2 actions into the DROP filter. byte[] macAddr4 = { (byte) 0x00, (byte) 0xaa, (byte) 0xbb, (byte) cookie, (byte) 0xdd, (byte) 0xee, }; actions = new JSONArray().put(createJSONObject("dscp", "dscp", (cookie * 7) & MASK_IP_DSCP)) .put(createJSONObject("dldst", "address", ByteUtils.toHexString(macAddr4))); drop.put("actions", actions); getJsonResult(uri, HTTP_PUT, drop.toString()); assertResponse(HTTP_OK); json = getJSONObject(uri); assertEquals(drop, json); allFilters.put(dropIdx, json); // Create REDIRECT filter with specifying one action. // VTN name in destination should be always ignored. JSONObject destination = new JSONObject().put("tenant", "vtn_100").put("bridge", "bridge_10") .put("interface", "if_20"); type = createJSONObject("redirect", "destination", destination, "output", true); actions = new JSONArray().put(createJSONObject("vlanpcp", "priority", (cookie * 3) & MASK_VLAN_PCP)); JSONObject redirect = new JSONObject().put("condition", "cond_" + cookie).put("filterType", type) .put("actions", actions); int redirectIdx = (cookie + 5000) & MASK_FLOWFILTER_INDEX; uri = createRelativeURI(baseUri, String.valueOf(redirectIdx)); getJsonResult(uri, HTTP_PUT, redirect.toString()); assertResponse(HTTP_CREATED); Assert.assertEquals(uri, httpLocation); json = getJSONObject(uri); redirect.put("index", redirectIdx); destination.remove("tenant"); assertEquals(redirect, json); getJsonResult(uri, HTTP_PUT, redirect.toString()); assertResponse(HTTP_NO_CONTENT); // Update destination of the REDIRECT filter. destination = new JSONObject().put("terminal", "term_123").put("interface", "if_1"); type = createJSONObject("redirect", "destination", destination, "output", false); redirect = new JSONObject().put("condition", "cond_" + cookie).put("filterType", type).put("actions", actions); getJsonResult(uri, HTTP_PUT, redirect.toString()); assertResponse(HTTP_OK); json = getJSONObject(uri); redirect.put("index", redirectIdx); assertEquals(redirect, json); allFilters.put(redirectIdx, json); getJsonResult(uri, HTTP_PUT, redirect.toString()); assertResponse(HTTP_NO_CONTENT); // BAD_REQUEST tests. // Specify invalid index. int[] badIndex = { -1, 0, 65536, 65537, 100000 }; for (int idx : badIndex) { uri = createRelativeURI(baseUri, String.valueOf(idx)); getJsonResult(uri, HTTP_PUT, pass.toString()); assertResponse(HTTP_BAD_REQUEST); getJsonResult(uri); assertResponse(HTTP_NO_CONTENT); } // No flow condition name. JSONObject bad = new JSONObject().put("filterType", new JSONObject().put("pass", empty)); getJsonResult(pass1Uri, HTTP_PUT, bad.toString()); assertResponse(HTTP_BAD_REQUEST); // No flow filter type. bad = new JSONObject().put("condition", "cond_1"); getJsonResult(pass1Uri, HTTP_PUT, bad.toString()); assertResponse(HTTP_BAD_REQUEST); // Specify invalid action. JSONObject[] badActions = { // Bad action parameter. createJSONObject("dlsrc", "address", "bad_MAC_address"), createJSONObject("dlsrc", "address", "00:00:00:00:00:00"), createJSONObject("dldst", "address", "01:00:00:00:00:00"), createJSONObject("dldst", "address", "ff:ff:ff:ff:ff:ff"), createJSONObject("vlanpcp", "priority", 8), createJSONObject("vlanpcp", "priority", -1), createJSONObject("inet4src", "address", "bad_ip_address"), createJSONObject("inet4src", "address", "100.200.300.400"), createJSONObject("inet4src", "address", "::1"), createJSONObject("inet4dst", "address", "bad_ip_address"), createJSONObject("inet4dst", "address", "100.200.1.256"), createJSONObject("inet4dst", "address", "2400:683c:af13:801::2034"), createJSONObject("dscp", "dscp", 64), createJSONObject("dscp", "dscp", -1), createJSONObject("tpsrc", "port", -1), createJSONObject("tpsrc", "port", 65536), createJSONObject("tpdst", "port", -1), createJSONObject("tpdst", "port", 65536), createJSONObject("icmptype", "type", -1), createJSONObject("icmptype", "type", 256), createJSONObject("icmpcode", "code", -1), createJSONObject("icmpcode", "code", 256), }; bad.put("filterType", new JSONObject().put("pass", empty)); for (JSONObject act : badActions) { actions = new JSONArray().put(act); bad.put("actions", actions); getJsonResult(pass1Uri, HTTP_PUT, bad.toString()); assertResponse(HTTP_BAD_REQUEST); } // Invalid destination of REDIRECT filter. String badName = "_badname"; String emptyName = ""; String longName = "12345678901234567890123456789012"; List<JSONObject> badDestinations = new ArrayList<JSONObject>(); badDestinations.add(null); badDestinations.add(empty); // No interface name. badDestinations.add(new JSONObject().put("bridge", "bridge_1")); badDestinations.add(new JSONObject().put("terminal", "vterm_1")); // Invalid node name. badDestinations.add(new JSONObject().put("bridge", badName).put("interface", "if_1")); badDestinations.add(new JSONObject().put("bridge", emptyName).put("interface", "if_1")); badDestinations.add(new JSONObject().put("terminal", longName).put("interface", "if_1")); // Invalid interface name. badDestinations.add(new JSONObject().put("bridge", "bridge_1").put("interface", badName)); badDestinations.add(new JSONObject().put("bridge", "bridge_1").put("interface", emptyName)); badDestinations.add(new JSONObject().put("bridge", "bridge_1").put("interface", longName)); badDestinations.add(new JSONObject().put("terminal", "vterm_1").put("interface", badName)); badDestinations.add(new JSONObject().put("terminal", "vterm_1").put("interface", emptyName)); badDestinations.add(new JSONObject().put("terminal", "vterm_1").put("interface", longName)); if (ifPath != null) { // Self redirection. String name = ifPath.getTenantNodeName(); String ifName = ifPath.getInterfaceName(); String key = (ifPath instanceof VBridgeIfPath) ? "bridge" : "terminal"; badDestinations.add(new JSONObject().put(key, name).put("interface", ifName)); } for (JSONObject dest : badDestinations) { JSONObject badType = createJSONObject("redirect", "destination", dest, "output", false); bad = new JSONObject().put("condition", "cond_" + cookie).put("filterType", badType); getJsonResult(pass1Uri, HTTP_PUT, bad.toString()); assertResponse(HTTP_BAD_REQUEST); } // Ensure that PASS filter at index 7777 was not modified. json = getJSONObject(pass1Uri); assertEquals(pass1, json); // Remove actions in PASS filter at index 7777. pass1.remove("actions"); getJsonResult(pass1Uri, HTTP_PUT, pass1.toString()); assertResponse(HTTP_OK); json = getJSONObject(pass1Uri); assertEquals(pass1, json); // Delete PASS filter at index 7777. getJsonResult(pass1Uri, HTTP_DELETE); assertResponse(HTTP_OK); getJsonResult(pass1Uri, HTTP_DELETE); assertResponse(HTTP_NO_CONTENT); // Get all flow filters again. JSONObject all = getJSONObject(baseUri); array = new JSONArray(allFilters.values()); JSONObject expected = new JSONObject().put("flowfilter", array); assertEquals(expected, all); return all; }
From source file:org.broadinstitute.cga.tools.gatk.walkers.cancer.mutect.MuTect.java
@Override public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext rawContext) { if (MTAC.NOOP) return 0; TreeMap<Double, CandidateMutation> messageByTumorLod = new TreeMap<Double, CandidateMutation>(); ReadBackedPileup pileup = rawContext.getBasePileup(); int numberOfReads = pileup.depthOfCoverage(); binReadsProcessed += numberOfReads;/*from w w w .j a v a 2s .c o m*/ if (binReadsProcessed >= 1000000) { long time = System.currentTimeMillis(); long elapsedTime = time - lastTime; lastTime = time; totalReadsProcessed += binReadsProcessed; binReadsProcessed = 0; logger.info(String.format("[MUTECT] Processed %d reads in %d ms", totalReadsProcessed, elapsedTime)); } // an optimization to speed things up when there is no coverage if (!MTAC.FORCE_OUTPUT && numberOfReads == 0) { return -1; } // get sequence context around mutation String sequenceContext = SequenceUtils.createSequenceContext(ref, 3); // only process bases where the reference is [ACGT], because the FASTA for HG18 has N,M and R! final char upRef = Character.toUpperCase(ref.getBaseAsChar()); if (upRef != 'A' && upRef != 'C' && upRef != 'G' && upRef != 'T') { return -1; } try { Map<SampleType, ReadBackedPileup> pileupMap = getPileupsBySampleType(pileup); final LocusReadPile tumorReadPile = new LocusReadPile(pileupMap.get(SampleType.TUMOR), upRef, MTAC.MIN_QSCORE, MIN_QSUM_QSCORE, false, MTAC.ARTIFACT_DETECTION_MODE, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile normalReadPile = new LocusReadPile(pileupMap.get(SampleType.NORMAL), upRef, MTAC.MIN_QSCORE, 0, this.USE_MAPQ0_IN_NORMAL_QSCORE, true, MTAC.ENABLE_QSCORE_OUTPUT); Collection<VariantContext> panelOfNormalsVC = tracker.getValues(normalPanelRod, rawContext.getLocation()); Collection<VariantContext> cosmicVC = tracker.getValues(cosmicRod, rawContext.getLocation()); Collection<VariantContext> dbsnpVC = tracker.getValues(dbsnpRod, rawContext.getLocation()); // remove the effect of cosmic from dbSNP boolean germlineAtRisk = (!dbsnpVC.isEmpty() && cosmicVC.isEmpty()); // compute coverage flags int tumorCoveredDepthThreshold = 14; int normalCoveredDepthThreshold = (germlineAtRisk) ? 19 : 8; if (!hasNormalBam) { normalCoveredDepthThreshold = 0; } int tumorBaseCount = tumorReadPile.finalPileupReads.size(); int normalBaseCount = normalReadPile.finalPileupReads.size(); boolean isTumorCovered = tumorBaseCount >= tumorCoveredDepthThreshold; boolean isNormalCovered = normalBaseCount >= normalCoveredDepthThreshold; boolean isBaseCovered = isTumorCovered && isNormalCovered; if (!hasNormalBam) { isBaseCovered = isTumorCovered; } stdCovWriter.writeCoverage(rawContext, isBaseCovered); int tumorQ20BaseCount = tumorReadPile.getFilteredBaseCount(20); int normalQ20BaseCount = normalReadPile.getFilteredBaseCount(20); q20CovWriter.writeCoverage(rawContext, tumorQ20BaseCount >= 20 && normalQ20BaseCount >= 20); tumorDepthWriter.writeCoverage(rawContext, tumorBaseCount); normalDepthWriter.writeCoverage(rawContext, normalBaseCount); // calculate power double tumorPower = tumorPowerCalculator.cachingPowerCalculation(tumorBaseCount, MTAC.POWER_CONSTANT_AF); double normalPowerNoSNPPrior = normalNovelSitePowerCalculator.cachingPowerCalculation(normalBaseCount); double normalPowerWithSNPPrior = normalDbSNPSitePowerCalculator .cachingPowerCalculation(normalBaseCount); double normalPower = (germlineAtRisk) ? normalPowerWithSNPPrior : normalPowerNoSNPPrior; double combinedPower = tumorPower * normalPower; if (!hasNormalBam) { combinedPower = tumorPower; } powerWriter.writeCoverage(rawContext, combinedPower); int mapQ0Reads = tumorReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads() + normalReadPile.qualityScoreFilteredPileup.getNumberOfMappingQualityZeroReads(); int totalReads = tumorReadPile.qualityScoreFilteredPileup.depthOfCoverage() + normalReadPile.qualityScoreFilteredPileup.depthOfCoverage(); // Test each of the possible alternate alleles for (final char altAllele : new char[] { 'A', 'C', 'G', 'T' }) { if (altAllele == upRef) { continue; } if (!MTAC.FORCE_OUTPUT && tumorReadPile.qualitySums.getCounts(altAllele) == 0) { continue; } CandidateMutation candidate = new CandidateMutation(rawContext.getLocation(), upRef); candidate.setSequenceContext(sequenceContext); candidate.setTumorSampleName(MTAC.TUMOR_SAMPLE_NAME); candidate.setNormalSampleName(MTAC.NORMAL_SAMPLE_NAME); candidate.setCovered(isBaseCovered); candidate.setPower(combinedPower); candidate.setTumorPower(tumorPower); candidate.setNormalPower(normalPower); candidate.setNormalPowerWithSNPPrior(normalPowerWithSNPPrior); candidate.setNormalPowerNoSNPPrior(normalPowerNoSNPPrior); candidate.setTumorQ20Count(tumorQ20BaseCount); candidate.setNormalQ20Count(normalQ20BaseCount); candidate.setInitialTumorNonRefQualitySum(tumorReadPile.qualitySums.getOtherQualities(upRef)); candidate.setAltAllele(altAllele); candidate.setMapQ0Reads(mapQ0Reads); candidate.setTotalReads(totalReads); candidate.setContaminationFraction(MTAC.FRACTION_CONTAMINATION); candidate.setPanelOfNormalsVC( panelOfNormalsVC.isEmpty() ? null : panelOfNormalsVC.iterator().next()); // if there are multiple, we're just grabbing the first candidate.setCosmicSite(!cosmicVC.isEmpty()); candidate.setDbsnpSite(!dbsnpVC.isEmpty()); candidate.setDbsnpVC(dbsnpVC.isEmpty() ? null : dbsnpVC.iterator().next()); candidate.setTumorF(tumorReadPile.estimateAlleleFraction(upRef, altAllele)); if (!MTAC.FORCE_OUTPUT && candidate.getTumorF() < MTAC.TUMOR_F_PRETEST) { continue; } if (++candidatesInspected % 1000 == 0) { logger.info(String.format("[MUTECT] Inspected %d potential candidates", candidatesInspected)); } candidate.setInitialTumorAltCounts(tumorReadPile.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(tumorReadPile.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(tumorReadPile.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(tumorReadPile.qualitySums.getQualitySum(upRef)); double tumorLod = tumorReadPile.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod); candidate.setInitialTumorReadDepth(tumorReadPile.finalPileupReads.size()); candidate.setTumorInsertionCount(tumorReadPile.getInsertionsCount()); candidate.setTumorDeletionCount(tumorReadPile.getDeletionsCount()); if (candidate.getTumorLodFStar() < MTAC.INITIAL_TUMOR_LOD_THRESHOLD) { continue; } // calculate lod of contaminant double contaminantF = Math.min(contaminantAlternateFraction, candidate.getTumorF()); VariableAllelicRatioGenotypeLikelihoods contaminantLikelihoods = new VariableAllelicRatioGenotypeLikelihoods( upRef, contaminantF); List<PileupElement> peList = new ArrayList<PileupElement>( tumorReadPile.finalPileup.depthOfCoverage()); for (PileupElement pe : tumorReadPile.finalPileup) { peList.add(pe); } Collections.sort(peList, new PileupComparatorByAltRefQual((byte) altAllele)); int readsToKeep = (int) (peList.size() * contaminantAlternateFraction); for (PileupElement pe : peList) { byte base = pe.getBase(); if (pe.getBase() == altAllele) { // if we've retained all we need, then turn the remainder of alts to ref if (readsToKeep == 0) { base = (byte) upRef; } else { readsToKeep--; } } contaminantLikelihoods.add(base, pe.getQual()); } double[] refHetHom = LocusReadPile.extractRefHetHom(contaminantLikelihoods, upRef, altAllele); double contaminantLod = refHetHom[1] - refHetHom[0]; candidate.setContaminantLod(contaminantLod); final QualitySums normQs = normalReadPile.qualitySums; VariableAllelicRatioGenotypeLikelihoods normalGl = normalReadPile .calculateLikelihoods(normalReadPile.qualityScoreFilteredPileup); // use MAPQ0 reads candidate.setInitialNormalBestGenotype(normalReadPile.getBestGenotype(normalGl)); candidate.setInitialNormalLod(LocusReadPile.getRefVsAlt(normalGl, upRef, altAllele)); double normalF = Math.max(LocusReadPile .estimateAlleleFraction(normalReadPile.qualityScoreFilteredPileup, upRef, altAllele), MTAC.MINIMUM_NORMAL_ALLELE_FRACTION); candidate.setNormalF(normalF); candidate.setInitialNormalAltQualitySum(normQs.getQualitySum(altAllele)); candidate.setInitialNormalRefQualitySum(normQs.getQualitySum(upRef)); candidate.setNormalAltQualityScores(normQs.getBaseQualityScores(altAllele)); candidate.setNormalRefQualityScores(normQs.getBaseQualityScores(upRef)); candidate.setInitialNormalAltCounts(normQs.getCounts(altAllele)); candidate.setInitialNormalRefCounts(normQs.getCounts(upRef)); candidate.setInitialNormalReadDepth(normalReadPile.finalPileupReads.size()); // TODO: parameterize filtering Mate-Rescued Reads (if someone wants to disable this) final LocusReadPile t2 = filterReads(ref, tumorReadPile.finalPileup, true); // if there are no reads remaining, abandon this theory if (!MTAC.FORCE_OUTPUT && t2.finalPileupReads.size() == 0) { continue; } candidate.setInitialTumorAltCounts(t2.qualitySums.getCounts(altAllele)); candidate.setInitialTumorRefCounts(t2.qualitySums.getCounts(upRef)); candidate.setInitialTumorAltQualitySum(t2.qualitySums.getQualitySum(altAllele)); candidate.setInitialTumorRefQualitySum(t2.qualitySums.getQualitySum(upRef)); candidate.setTumorAltQualityScores(t2.qualitySums.getBaseQualityScores(altAllele)); candidate.setTumorRefQualityScores(t2.qualitySums.getBaseQualityScores(upRef)); VariableAllelicRatioGenotypeLikelihoods t2Gl = t2.calculateLikelihoods(t2.finalPileup); candidate.setInitialTumorLod(t2.getAltVsRef(t2Gl, upRef, altAllele)); candidate.setInitialTumorReadDepth(t2.finalPileupReads.size()); candidate.setTumorF(t2.estimateAlleleFraction(upRef, altAllele)); double tumorLod2 = t2.calculateAltVsRefLOD((byte) altAllele, candidate.getTumorF(), 0); candidate.setTumorLodFStar(tumorLod2); //TODO: clean up use of forward/reverse vs positive/negative (prefer the latter since GATK uses it) ReadBackedPileup forwardPileup = filterReads(ref, tumorReadPile.finalPileupPositiveStrand, true).finalPileupPositiveStrand; double f2forward = LocusReadPile.estimateAlleleFraction(forwardPileup, upRef, altAllele); candidate.setTumorLodFStarForward( t2.calculateAltVsRefLOD(forwardPileup, (byte) altAllele, f2forward, 0.0)); ReadBackedPileup reversePileup = filterReads(ref, tumorReadPile.finalPileupNegativeStrand, true).finalPileupNegativeStrand; double f2reverse = LocusReadPile.estimateAlleleFraction(reversePileup, upRef, altAllele); candidate.setTumorLodFStarReverse( t2.calculateAltVsRefLOD(reversePileup, (byte) altAllele, f2reverse, 0.0)); // calculate strand bias power candidate.setPowerToDetectPositiveStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(reversePileup.depthOfCoverage(), candidate.getTumorF())); candidate.setPowerToDetectNegativeStrandArtifact(strandArtifactPowerCalculator .cachingPowerCalculation(forwardPileup.depthOfCoverage(), candidate.getTumorF())); candidate.setStrandContingencyTable(SequenceUtils.getStrandContingencyTable(forwardPileup, reversePileup, (byte) upRef, (byte) altAllele)); ArrayList<PileupElement> mutantPileupElements = new ArrayList<PileupElement>(); ArrayList<PileupElement> referencePileupElements = new ArrayList<PileupElement>(); for (PileupElement p : t2.finalPileup) { final SAMRecord read = p.getRead(); final int offset = p.getOffset(); if (read.getReadString().charAt(offset) == altAllele) { mutantPileupElements.add(p); } else if (read.getReadString().charAt(offset) == upRef) { referencePileupElements.add(p); } else { // just drop the read... } } ReadBackedPileup mutantPileup = new ReadBackedPileupImpl(rawContext.getLocation(), mutantPileupElements); ReadBackedPileup referencePileup = new ReadBackedPileupImpl(rawContext.getLocation(), referencePileupElements); // TODO: shouldn't this be refAllele here? final LocusReadPile mutantPile = new LocusReadPile(mutantPileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); final LocusReadPile refPile = new LocusReadPile(referencePileup, altAllele, 0, 0, MTAC.ENABLE_QSCORE_OUTPUT); // Set the maximum observed mapping quality score for the reference and alternate alleles int[] rmq = referencePileup.getMappingQuals(); candidate.setTumorRefMaxMapQ((rmq.length == 0) ? 0 : NumberUtils.max(rmq)); int[] amq = mutantPileup.getMappingQuals(); candidate.setTumorAltMaxMapQ((amq.length == 0) ? 0 : NumberUtils.max(amq)); // start with just the tumor pile candidate.setTumorAltForwardOffsetsInRead(SequenceUtils.getForwardOffsetsInRead(mutantPileup)); candidate.setTumorAltReverseOffsetsInRead(SequenceUtils.getReverseOffsetsInRead(mutantPileup)); if (candidate.getTumorAltForwardOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltForwardOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorForwardOffsetsInReadMedian(median); candidate.setTumorForwardOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } if (candidate.getTumorAltReverseOffsetsInRead().size() > 0) { double[] offsets = MuTectStats .convertIntegersToDoubles(candidate.getTumorAltReverseOffsetsInRead()); double median = MuTectStats.getMedian(offsets); candidate.setTumorReverseOffsetsInReadMedian(median); candidate.setTumorReverseOffsetsInReadMad(MuTectStats.calculateMAD(offsets, median)); } // test to see if the candidate should be rejected performRejection(candidate); if (MTAC.FORCE_ALLELES) { out.println(callStatsGenerator.generateCallStats(candidate)); } else { messageByTumorLod.put(candidate.getInitialTumorLod(), candidate); } } // if more than one site passes the tumor lod threshold for KEEP the fail the tri_allelic Site filter int passingCandidates = 0; for (CandidateMutation c : messageByTumorLod.values()) { if (c.getTumorLodFStar() >= MTAC.TUMOR_LOD_THRESHOLD) { passingCandidates++; } } if (passingCandidates > 1) { for (CandidateMutation c : messageByTumorLod.values()) { c.addRejectionReason("triallelic_site"); } } // write out the call stats for the "best" candidate if (!messageByTumorLod.isEmpty()) { CandidateMutation m = messageByTumorLod.lastEntry().getValue(); // only output passing calls OR rejected sites if ONLY_PASSING_CALLS is not specified if (!m.isRejected() || (m.isRejected() && !MTAC.ONLY_PASSING_CALLS)) { out.println(callStatsGenerator.generateCallStats(m)); if (vcf != null) { vcf.add(VCFGenerator.generateVC(m)); } } } return -1; } catch (Throwable t) { System.err.println("Error processing " + rawContext.getContig() + ":" + rawContext.getPosition()); t.printStackTrace(System.err); throw new RuntimeException(t); } }