List of usage examples for java.util PriorityQueue poll
public E poll()
From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java
@Override public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) { MONITOR.startTask("parseSentence"); try {/*from www.j a va2 s. c om*/ long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>(); PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); PriorityQueue<ParseConfiguration> backupHeap = null; PriorityQueue<ParseConfiguration> finalHeap = null; PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>(); while (heaps.size() > 0) { Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry(); PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new PriorityQueue<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.peek(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } if (earlyStop && terminalHeap.size() >= beamWidth) { LOG.debug( "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting."); finalHeap = terminalHeap; finished = true; } long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } if (finished) { break; } // limit the breadth to K int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.poll(); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<Transition> positiveRuleDecision = TalismaneSession .getTransitionSystem().createDefaultDecision(rule.getTransition()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current configuration List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("feature analyse"); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(history, env); if (featureResult != null) parseFeatureResults.add(featureResult); } finally { MONITOR.endTask(feature.getName()); } } if (LOG_FEATURES.isTraceEnabled()) { for (FeatureResult<?> featureResult : parseFeatureResults) { LOG_FEATURES.trace(featureResult.toString()); } } } finally { MONITOR.endTask("feature analyse"); } // evaluate the feature results using the decision maker MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(parseFeatureResults); for (ClassificationObserver<Transition> observer : this.observers) { observer.onAnalyse(history, parseFeatureResults, decisions); } List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>( decisions.size()); for (Decision<Transition> decision : decisions) { if (decision.getProbability() > MIN_PROB_TO_STORE) decisionShortList.add(decision); } decisions = decisionShortList; } finally { MONITOR.endTask("make decision"); } // apply the negative rules Set<Transition> eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.addAll(rule.getTransitions()); if (LOG.isTraceEnabled()) { for (Transition eliminatedTransition : rule.getTransitions()) LOG.trace("Rule applies. Eliminating transition: " + eliminatedTransition.getCode()); } } } if (eliminatedTransitions.size() > 0) { List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(); for (Decision<Transition> decision : decisions) { if (!eliminatedTransitions.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? boolean transitionApplied = false; // add new configuration to the heap, one for each valid transition MONITOR.startTask("heap sort"); try { // Why apply all decisions here? Why not just the top N (where N = beamwidth)? // Answer: because we're not always adding solutions to the same heap // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1) // So, if we've already applied N decisions of each type, we should be able to stop for (Decision<Transition> decision : decisions) { Transition transition = decision.getOutcome(); if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability()); if (transition.checkPreconditions(history)) { transitionApplied = true; ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); if (decision.isStatistical()) configuration.addDecision(decision); transition.apply(configuration); int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; if (configuration.isTerminal()) { nextHeapIndex = Integer.MAX_VALUE; } else { while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; } PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { if (configuration.isTerminal()) nextHeap = terminalHeap; else nextHeap = new PriorityQueue<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } else { if (LOG.isTraceEnabled()) LOG.trace("Cannot apply transition: doesn't meet pre-conditions"); // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); } // does transition meet pre-conditions? } // next transition } finally { MONITOR.endTask("heap sort"); } if (transitionApplied) { j++; } else { LOG.trace("No transitions could be applied: not counting this history as part of the beam"); } // beam width test if (j == maxSequences) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); LOG.debug("Decisions: " + finalConfiguration.getDecisions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (Decision<Transition> decision : finalConfiguration.getDecisions()) { sb.append(" * "); sb.append(df.format(decision.getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getTransitions().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }
From source file:org.caleydo.neo4j.plugins.kshortestpaths.KShortestPathsAlgo.java
public List<WeightedPath> run(Node sourceNode, Node targetNode, int k, IPathReadyListener onPathReady) { StopWatch w = new StopWatch(); w.start();/* w ww .j av a 2 s. co m*/ // Calculate shortest path first List<WeightedPath> paths = new ArrayList<>(k); profile("start", w); WeightedPath shortestPath = shortestPathFinder.findSinglePath(sourceNode, targetNode); if (shortestPath == null) return paths; profile("initial disjkra", w); PriorityQueue<WeightedPath> pathCandidates = new PriorityQueue<WeightedPath>(20, new Comparator<WeightedPath>() { @Override public int compare(WeightedPath o1, WeightedPath o2) { return Double.compare(o1.weight(), o2.weight()); } }); Set<Integer> pathCandidateHashes = new HashSet<>(); if (onPathReady != null) { onPathReady.onPathReady(shortestPath); } paths.add(shortestPath); pathCandidateHashes.add(generatePathHash(shortestPath)); for (int i = 1; i < k; i++) { WeightedPath prevPath = paths.get(i - 1); for (Node spurNode : prevPath.nodes()) { if (spurNode.getId() == prevPath.endNode().getId()) break; WeightedPath rootPath = getSubPathTo(prevPath, spurNode); for (Path path : paths) { Iterator<Relationship> pathIterator = path.relationships().iterator(); boolean containsRootPath = true; // Test if the existing shortest path starts with the root path for (Relationship relationship : rootPath.relationships()) { if (!pathIterator.hasNext()) { containsRootPath = false; break; } Relationship pathRelationship = pathIterator.next(); if (relationship.getId() != pathRelationship.getId()) { containsRootPath = false; break; } } // If so, set edge weight of following edge in that path to infinity if (containsRootPath) { if (pathIterator.hasNext()) { Relationship r = pathIterator.next(); costEvaluator.addInvalidRelationship(r); //profile("invalid: "+r,w); } } } // Simulate removal of root path nodes (except spur node) by setting all their edge weights to // infinity Set<Long> badIds = new HashSet<Long>(); for (Node rootPathNode : rootPath.nodes()) { if (rootPathNode.getId() != spurNode.getId()) { badIds.add(rootPathNode.getId()); //for (Relationship relationship : getRelationships(rootPathNode)) { // costEvaluator.addInvalidRelationship(relationship); //} //profile("invalids: "+rootPathNode.getRelationships(),w); } } expander.setExtraIgnoreNodes(badIds); profile("Find next path", w); WeightedPath spurPath = shortestPathFinder.findSinglePath(spurNode, targetNode); profile("Found next path", w); if (spurPath != null && !Double.isInfinite(spurPath.weight())) { WeightedPath pathCandidate = concatenate(rootPath, spurPath); Integer pathHash = generatePathHash(pathCandidate); if (!pathCandidateHashes.contains(pathHash)) { pathCandidates.add(pathCandidate); pathCandidateHashes.add(pathHash); } } // Restore edges costEvaluator.clearInvalidRelationships(); expander.setExtraIgnoreNodes(null); } if (pathCandidates.isEmpty()) break; WeightedPath nextBest = pathCandidates.poll(); profile("flush path", w); if (onPathReady != null) { onPathReady.onPathReady(nextBest); } paths.add(nextBest); } profile("done", w); return paths; }
From source file:com.joliciel.jochre.search.highlight.FixedSizeSnippetFinder.java
@Override public List<Snippet> findSnippets(int docId, Set<String> fields, Set<HighlightTerm> highlightTerms, int maxSnippets, int snippetSize) { try {//from w w w . j a v a 2s . c om Document doc = indexSearcher.doc(docId); JochreIndexDocument jochreDoc = searchService.getJochreIndexDocument(indexSearcher, docId); // find best snippet for each term PriorityQueue<Snippet> heap = new PriorityQueue<Snippet>(); int i = -1; for (HighlightTerm term : highlightTerms) { i++; String content = jochreDoc.getContents(); CoordinateStorage coordinateStorage = jochreDoc.getCoordinateStorage(); if (term.getStartOffset() >= content.length()) { String title = doc.get("title"); String startPage = doc.get("startPage"); String endPage = doc.get("endPage"); LOG.debug("Content: " + content); throw new RuntimeException(term.toString() + " cannot fit into contents for doc " + title + ", pages " + startPage + " to " + endPage + ", length: " + content.length()); } List<HighlightTerm> snippetTerms = new ArrayList<HighlightTerm>(); snippetTerms.add(term); int j = -1; boolean foundImage = false; for (HighlightTerm otherTerm : highlightTerms) { j++; if (j <= i) continue; if (otherTerm.getImageIndex() != term.getImageIndex()) { if (foundImage) break; else continue; } foundImage = true; if (otherTerm.getStartOffset() < term.getStartOffset() + snippetSize) { snippetTerms.add(otherTerm); } else { break; } } HighlightTerm lastTerm = snippetTerms.get(snippetTerms.size() - 1); int middle = (term.getStartOffset() + lastTerm.getEndOffset()) / 2; int start = middle - (snippetSize / 2); int end = middle + (snippetSize / 2); if (start > term.getStartOffset()) start = term.getStartOffset(); if (end < lastTerm.getEndOffset()) end = lastTerm.getEndOffset(); if (start < 0) start = 0; if (end > content.length()) end = content.length(); for (int k = start; k >= 0; k--) { if (Character.isWhitespace(content.charAt(k))) { start = k + 1; break; } } for (int k = end; k < content.length(); k++) { if (Character.isWhitespace(content.charAt(k))) { end = k; break; } } int imageStartOffset = coordinateStorage.getImageStartOffset(term.getImageIndex()); int imageEndOffset = Integer.MAX_VALUE; if (term.getImageIndex() + 1 < coordinateStorage.getImageCount()) { imageEndOffset = coordinateStorage.getImageStartOffset(term.getImageIndex() + 1); } if (start < imageStartOffset) start = imageStartOffset; if (end > imageEndOffset) end = imageEndOffset; Snippet snippet = new Snippet(docId, term.getField(), start, end); snippet.setHighlightTerms(snippetTerms); heap.add(snippet); } // if we have no snippets, add one per field type if (heap.isEmpty()) { String content = jochreDoc.getContents(); int end = snippetSize * maxSnippets; if (end > content.length()) end = content.length(); for (int k = end; k < content.length(); k++) { if (Character.isWhitespace(content.charAt(k))) { end = k; break; } } Snippet snippet = new Snippet(docId, fields.iterator().next(), 0, end); heap.add(snippet); } List<Snippet> snippets = new ArrayList<Snippet>(maxSnippets); while (snippets.size() < maxSnippets && !heap.isEmpty()) { Snippet snippet = heap.poll(); boolean hasOverlap = false; for (Snippet otherSnippet : snippets) { if (otherSnippet.hasOverlap(snippet)) hasOverlap = true; } if (!hasOverlap) snippets.add(snippet); } for (Snippet snippet : snippets) { LOG.debug("Added snippet: " + snippet.toJson()); } return snippets; } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.uber.stream.kafka.mirrormaker.manager.core.ControllerHelixManager.java
public void scaleCurrentCluster() throws Exception { int oldTotalNumWorker = 0; int newTotalNumWorker = 0; Map<String, Integer> _routeWorkerOverrides = getRouteWorkerOverride(); for (String pipeline : _pipelineToInstanceMap.keySet()) { LOGGER.info("Start rescale pipeline: {}", pipeline); PriorityQueue<InstanceTopicPartitionHolder> newItphQueue = new PriorityQueue<>(1, InstanceTopicPartitionHolder.totalWorkloadComparator(_pipelineWorkloadMap)); // TODO: what if routeId is not continuous int nextRouteId = _pipelineToInstanceMap.get(pipeline).size(); for (InstanceTopicPartitionHolder itph : _pipelineToInstanceMap.get(pipeline)) { if (itph.getTotalNumPartitions() > _maxNumPartitionsPerRoute) { LOGGER.info(//from ww w . j a va 2 s . c om "Checking route {} with controller {} and topics {} since it exceeds maxNumPartitionsPerRoute {}", itph.getRouteString(), itph.getInstanceName(), itph.getServingTopicPartitionSet(), _maxNumPartitionsPerRoute); while (itph.getTotalNumPartitions() > _maxNumPartitionsPerRoute) { // Only one topic left, do nothing if (itph.getNumServingTopicPartitions() == 1) { LOGGER.info("Only one topic {} in route {}, do nothing", itph.getServingTopicPartitionSet().iterator().next(), itph.getRouteString()); break; } // Get the topic with largest number of partitions TopicPartition tpToMove = new TopicPartition("tmp", -1); for (TopicPartition tp : itph.getServingTopicPartitionSet()) { if (tp.getPartition() > tpToMove.getPartition()) { tpToMove = tp; } } // If existing lightest route cannot fit the largest topic to move if (newItphQueue.isEmpty() || newItphQueue.peek().getTotalNumPartitions() + tpToMove.getPartition() > _initMaxNumPartitionsPerRoute) { try { InstanceTopicPartitionHolder newHolder = createNewRoute(pipeline, nextRouteId); _helixAdmin.setResourceIdealState(_helixClusterName, tpToMove.getTopic(), IdealStateBuilder.resetCustomIdealStateFor( _helixAdmin.getResourceIdealState(_helixClusterName, tpToMove.getTopic()), tpToMove.getTopic(), itph.getRouteString(), newHolder.getRouteString(), newHolder.getInstanceName())); itph.removeTopicPartition(tpToMove); newHolder.addTopicPartition(tpToMove); newItphQueue.add(newHolder); nextRouteId++; } catch (Exception e) { LOGGER.error("Got exception when create a new route when rebalancing, abandon!", e); throw new Exception( "Got exception when create a new route when rebalancing, abandon!", e); } } else { InstanceTopicPartitionHolder newHolder = newItphQueue.poll(); _helixAdmin.setResourceIdealState(_helixClusterName, tpToMove.getTopic(), IdealStateBuilder.resetCustomIdealStateFor( _helixAdmin.getResourceIdealState(_helixClusterName, tpToMove.getTopic()), tpToMove.getTopic(), itph.getRouteString(), newHolder.getRouteString(), newHolder.getInstanceName())); itph.removeTopicPartition(tpToMove); newHolder.addTopicPartition(tpToMove); newItphQueue.add(newHolder); } } } newItphQueue.add(itph); } // After moving topics, scale workers based on workload int rescaleFailedCount = 0; for (InstanceTopicPartitionHolder itph : newItphQueue) { oldTotalNumWorker += itph.getWorkerSet().size(); String routeString = itph.getRouteString(); int initWorkerCount = _initMaxNumWorkersPerRoute; if (_routeWorkerOverrides.containsKey(routeString) && _routeWorkerOverrides.get(routeString) > initWorkerCount) { initWorkerCount = _routeWorkerOverrides.get(routeString); } String hostname = getHostname(itph.getInstanceName()); try { String result = HttpClientUtils.getData(_httpClient, _requestConfig, hostname, _controllerPort, "/admin/workloadinfo"); ControllerWorkloadInfo workloadInfo = JSONObject.parseObject(result, ControllerWorkloadInfo.class); TopicWorkload totalWorkload = workloadInfo.getTopicWorkload(); if (workloadInfo != null && workloadInfo.getNumOfExpectedWorkers() != 0) { _pipelineWorkloadMap.put(itph.getRouteString(), totalWorkload); int expectedNumWorkers = workloadInfo.getNumOfExpectedWorkers(); LOGGER.info("Current {} workers in route {}, expect {} workers", itph.getWorkerSet().size(), itph.getRouteString(), expectedNumWorkers); int actualExpectedNumWorkers = getActualExpectedNumWorkers(expectedNumWorkers, initWorkerCount); LOGGER.info("Current {} workers in route {}, actual expect {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers); if (actualExpectedNumWorkers > itph.getWorkerSet().size()) { LOGGER.info("Current {} workers in route {}, actual expect {} workers, add {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers, actualExpectedNumWorkers - itph.getWorkerSet().size()); // TODO: handle exception _workerHelixManager.addWorkersToMirrorMaker(itph, itph.getRoute().getTopic(), itph.getRoute().getPartition(), actualExpectedNumWorkers - itph.getWorkerSet().size()); } if (actualExpectedNumWorkers < itph.getWorkerSet().size()) { LOGGER.info( "Current {} workers in route {}, actual expect {} workers, remove {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers, itph.getWorkerSet().size() - actualExpectedNumWorkers); // TODO: handle exception _workerHelixManager.removeWorkersToMirrorMaker(itph, itph.getRoute().getTopic(), itph.getRoute().getPartition(), itph.getWorkerSet().size() - actualExpectedNumWorkers); } newTotalNumWorker += actualExpectedNumWorkers; } else { LOGGER.warn("Get workload on {} for route: {} returns 0. No change on number of workers", hostname, itph.getRouteString()); newTotalNumWorker += itph.getWorkerSet().size(); rescaleFailedCount++; } } catch (Exception e) { rescaleFailedCount++; LOGGER.error(String.format( "Get workload error when connecting to %s for route %s. No change on number of workers", hostname, itph.getRouteString()), e); newTotalNumWorker += itph.getWorkerSet().size(); rescaleFailedCount++; } } _pipelineToInstanceMap.put(pipeline, newItphQueue); _rescaleFailedCount.inc(rescaleFailedCount - _rescaleFailedCount.getCount()); } LOGGER.info("oldTotalNumWorker: {}, newTotalNumWorker: {}", oldTotalNumWorker, newTotalNumWorker); }
From source file:main.java.RMDupper.java
public static void checkForDuplication(DupStats dupStats, OccurenceCounterMerged occurenceCounterMerged, SAMFileWriter outputSam, Boolean allReadsAsMerged, PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>> recordBuffer, PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>> duplicateBuffer, Set<String> discardSet) { // At this point recordBuffer contains all alignments that overlap with its first entry // Therefore the task here is to de-duplicate for the first entry in recordBuffer duplicateBuffer.clear();//from ww w . ja v a 2 s. c o m Iterator<ImmutableTriple<Integer, Integer, SAMRecord>> it = recordBuffer.iterator(); while (it.hasNext()) { ImmutableTriple<Integer, Integer, SAMRecord> maybeDuplicate = it.next(); if (allReadsAsMerged) { if (recordBuffer.peek().left.equals(maybeDuplicate.left) && recordBuffer.peek().middle.equals(maybeDuplicate.middle)) { duplicateBuffer.add(maybeDuplicate); } } else { // We build a logic table EnumSet<DL> testConditon = EnumSet.noneOf(DL.class); if (recordBuffer.peek().right.getReadName().startsWith("M_")) { testConditon.add(DL.buffer_read_merged); } else if (recordBuffer.peek().right.getReadName().startsWith("F_")) { testConditon.add(DL.buffer_read_one); } else if (recordBuffer.peek().right.getReadName().startsWith("R_")) { testConditon.add(DL.buffer_read_two); } else { throw new RuntimeException("Unlabelled read '" + recordBuffer.peek().right.getReadName() + "' read name must start with one of M_,F_,R when not treating all reads as merged"); } if (maybeDuplicate.right.getReadName().startsWith("M_")) { testConditon.add(DL.maybed_read_merged); } else if (maybeDuplicate.right.getReadName().startsWith("F_")) { testConditon.add(DL.maybed_read_one); } else if (maybeDuplicate.right.getReadName().startsWith("R_")) { testConditon.add(DL.maybed_read_two); } else { System.err.println("Unlabelled read '" + maybeDuplicate.right.getReadName() + "' read name must start with one of M_,F_,R when not treating all reads as merged"); } if (recordBuffer.peek().left.equals(maybeDuplicate.left)) { testConditon.add(DL.equal_alignment_start); } if (recordBuffer.peek().middle.equals(maybeDuplicate.middle)) { testConditon.add(DL.equal_alignment_end); } boolean duplicateIsShorterOrEqual = maybeDuplicate.middle - maybeDuplicate.left <= recordBuffer.peek().middle - recordBuffer.peek().left; boolean duplicateIsLongerOrEqual = recordBuffer.peek().middle - recordBuffer.peek().left <= maybeDuplicate.middle - maybeDuplicate.left; if (duplicateIsShorterOrEqual) { testConditon.add(DL.maybed_shorter_or_equal); } if (duplicateIsLongerOrEqual) { testConditon.add(DL.maybed_longer_or_equal); } if (recordBuffer.peek().right.getReadNegativeStrandFlag()) { testConditon.add(DL.buffer_reverse_strand); } else { testConditon.add(DL.buffer_forward_strand); } if (maybeDuplicate.right.getReadNegativeStrandFlag()) { testConditon.add(DL.maybed_reverse_strand); } else { testConditon.add(DL.maybed_forward_strand); } //System.out.println("Testing for duplication: "+testConditon); //System.out.println(recordBuffer.peek().right.getReadName()+"\t"+recordBuffer.peek().right.getAlignmentStart()+"\t"+recordBuffer.peek().right.getAlignmentEnd()); //System.out.println(maybeDuplicate.right.getReadName()+"\t"+maybeDuplicate.right.getAlignmentStart()+"\t"+maybeDuplicate.right.getAlignmentEnd()); //for ( EnumSet<DL> match : duplicateConditionSet.stream().filter(dc -> testConditon.containsAll(dc) ).collect(Collectors.toList()) ) { // System.out.println("Match to: "+match); //} //for ( EnumSet<DL> match : duplicateConditionSet.stream().collect(Collectors.toList()) ) { // System.out.println("Try to match: "+match); // if ( match.containsAll(testConditon) ) // { // System.out.println("success"); // } //} // Test for Duplication if (duplicateConditionSet.stream().anyMatch(dc -> testConditon.containsAll(dc))) { duplicateBuffer.add(maybeDuplicate); } } } //START DEBUG /* System.out.println ("recordBuffer"); Comparator<SAMRecord> samRecordComparatorForRecordBuffer = new SAMRecordPositionAndQualityComparator(); ArrayList<ImmutableTriple<Integer, Integer, SAMRecord>> sortedRecordBuffer = new ArrayList<ImmutableTriple<Integer, Integer, SAMRecord>>(recordBuffer.size()); Iterator<ImmutableTriple<Integer, Integer, SAMRecord>> rit = recordBuffer.iterator(); while (rit.hasNext()) { sortedRecordBuffer.add(rit.next()); } sortedRecordBuffer.sort(Comparator.comparing(ImmutableTriple<Integer, Integer, SAMRecord>::getRight, samRecordComparatorForRecordBuffer)); for ( ImmutableTriple<Integer, Integer, SAMRecord> currTriple : sortedRecordBuffer ) { System.out.println(" srb: "+(currTriple.right.getReadNegativeStrandFlag()?"-":"+")+" "+currTriple+" "+SAMRecordQualityComparator.getQualityScore(currTriple.right.getBaseQualityString())); } System.out.println ("duplicateBuffer"); ArrayList<ImmutableTriple<Integer, Integer, SAMRecord>> sortedDuplicateBuffer = new ArrayList<ImmutableTriple<Integer, Integer, SAMRecord>>(duplicateBuffer.size()); Iterator<ImmutableTriple<Integer, Integer, SAMRecord>> dit = duplicateBuffer.iterator(); while (dit.hasNext()) { sortedDuplicateBuffer.add(dit.next()); } sortedDuplicateBuffer.sort(Comparator.comparing(ImmutableTriple<Integer, Integer, SAMRecord>::getMiddle)); for ( ImmutableTriple<Integer, Integer, SAMRecord> currTriple : sortedDuplicateBuffer ) { System.out.println(" dbe: "+(currTriple.right.getReadNegativeStrandFlag()?"-":"+")+" "+currTriple+" "+SAMRecordQualityComparator.getQualityScore(currTriple.right.getBaseQualityString())); } // Sort again with priority queue order sortedDuplicateBuffer.sort(Comparator.comparing(ImmutableTriple<Integer, Integer, SAMRecord>::getRight, samRecordComparator.reversed())); for ( ImmutableTriple<Integer, Integer, SAMRecord> currTriple : sortedDuplicateBuffer ) { System.out.println("sdbe: "+(currTriple.right.getReadNegativeStrandFlag()?"-":"+")+" "+currTriple+" "+SAMRecordQualityComparator.getQualityScore(currTriple.right.getBaseQualityString())); } */ //END DEBUG if (!duplicateBuffer.isEmpty() && !discardSet.contains(duplicateBuffer.peek().right.getReadName())) { //System.out.println("WRITE "+duplicateBuffer.peek()); decrementDuplicateStats(dupStats, allReadsAsMerged, duplicateBuffer.peek().right.getReadName()); occurenceCounterMerged.putValue(Long .valueOf(duplicateBuffer.stream() .filter(d -> allReadsAsMerged || d.right.getReadName().startsWith("M_")).count()) .intValue() - 1); outputSam.addAlignment(duplicateBuffer.peek().right); } while (!duplicateBuffer.isEmpty()) { discardSet.add(duplicateBuffer.poll().right.getReadName()); } // Maintain the invariant that the first item in recordBuffer may have duplicates while (!recordBuffer.isEmpty() && discardSet.contains(recordBuffer.peek().right.getReadName())) { String duplicateReadName = recordBuffer.poll().right.getReadName(); incrementDuplicateStats(dupStats, allReadsAsMerged, duplicateReadName); discardSet.remove(duplicateReadName); } }