List of usage examples for java.util PriorityQueue peek
public E peek()
From source file:org.mule.util.store.MonitoredObjectStoreWrapper.java
public void expire() { try {//from w w w.j a v a 2s . c om final long now = System.nanoTime(); List<Serializable> keys = allKeys(); int excess = (allKeys().size() - maxEntries); if (maxEntries > 0 && excess > 0) { PriorityQueue<StoredObject<T>> q = new PriorityQueue<StoredObject<T>>(excess, new Comparator<StoredObject<T>>() { @Override public int compare(StoredObject<T> paramT1, StoredObject<T> paramT2) { return paramT2.timestamp.compareTo(paramT1.timestamp); } }); long youngest = Long.MAX_VALUE; for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); //TODO extract the entryTTL>0 outside of loop if (entryTTL > 0 && TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); excess--; if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } else { if (excess > 0 && (q.size() < excess || obj.timestamp < youngest)) { q.offer(obj); youngest = q.peek().timestamp; } if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } } for (int i = 0; i < excess; i++) { Serializable key = q.poll().key; remove(key); } } else { if (entryTTL > 0) { for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); if (TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); } } } } } catch (Exception e) { logger.warn("Running expirty on " + baseStore + " threw " + e + ":" + e.getMessage()); } }
From source file:com.navercorp.pinpoint.web.service.AgentEventServiceImpl.java
private List<AgentEvent> createAgentEvents(List<AgentEventBo> agentEventBos) { if (CollectionUtils.isEmpty(agentEventBos)) { return Collections.emptyList(); }/*from w w w . j av a 2s.com*/ List<AgentEvent> agentEvents = new ArrayList<>(agentEventBos.size()); PriorityQueue<DurationalAgentEvent> durationalAgentEvents = new PriorityQueue<>(agentEventBos.size(), AgentEvent.EVENT_TIMESTAMP_ASC_COMPARATOR); for (AgentEventBo agentEventBo : agentEventBos) { if (agentEventBo.getEventType().isCategorizedAs(AgentEventTypeCategory.DURATIONAL)) { durationalAgentEvents.add(createDurationalAgentEvent(agentEventBo, false)); } else { boolean hasMessage = !ArrayUtils.isEmpty(agentEventBo.getEventBody()); agentEvents.add(createAgentEvent(agentEventBo, hasMessage)); } } long durationStartTimestamp = DurationalAgentEvent.UNKNOWN_TIMESTAMP; while (!durationalAgentEvents.isEmpty()) { DurationalAgentEvent currentEvent = durationalAgentEvents.remove(); if (durationStartTimestamp == DurationalAgentEvent.UNKNOWN_TIMESTAMP) { durationStartTimestamp = currentEvent.getEventTimestamp(); } currentEvent.setDurationStartTimestamp(durationStartTimestamp); DurationalAgentEvent nextEvent = durationalAgentEvents.peek(); if (nextEvent != null) { long nextEventTimestamp = nextEvent.getEventTimestamp(); currentEvent.setDurationEndTimestamp(nextEventTimestamp); durationStartTimestamp = nextEventTimestamp; } agentEvents.add(currentEvent); } return agentEvents; }
From source file:org.apache.storm.daemon.logviewer.utils.DirectoryCleaner.java
/** * If totalSize of files exceeds the either the per-worker quota or global quota, * Logviewer deletes oldest inactive log files in a worker directory or in all worker dirs. * We use the parameter forPerDir to switch between the two deletion modes. * * @param dirs the list of directories to be scanned for deletion * @param quota the per-dir quota or the total quota for the all directories * @param forPerDir if true, deletion happens for a single dir; otherwise, for all directories globally * @param activeDirs only for global deletion, we want to skip the active logs in activeDirs * @return number of files deleted/*from www. j a v a 2 s. c o m*/ */ public DeletionMeta deleteOldestWhileTooLarge(List<Path> dirs, long quota, boolean forPerDir, Set<Path> activeDirs) throws IOException { long totalSize = 0; for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { totalSize += Files.size(path); } } } LOG.debug("totalSize: {} quota: {}", totalSize, quota); long toDeleteSize = totalSize - quota; if (toDeleteSize <= 0) { return DeletionMeta.EMPTY; } int deletedFiles = 0; long deletedSize = 0; // the oldest pq_size files in this directory will be placed in PQ, with the newest at the root PriorityQueue<Pair<Path, FileTime>> pq = new PriorityQueue<>(PQ_SIZE, Comparator.comparing((Pair<Path, FileTime> p) -> p.getRight()).reversed()); int round = 0; final Set<Path> excluded = new HashSet<>(); while (toDeleteSize > 0) { LOG.debug("To delete size is {}, start a new round of deletion, round: {}", toDeleteSize, round); for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { if (!excluded.contains(path)) { if (isFileEligibleToSkipDelete(forPerDir, activeDirs, dir, path)) { excluded.add(path); } else { Pair<Path, FileTime> p = Pair.of(path, Files.getLastModifiedTime(path)); if (pq.size() < PQ_SIZE) { pq.offer(p); } else if (p.getRight().toMillis() < pq.peek().getRight().toMillis()) { pq.poll(); pq.offer(p); } } } } } } if (!pq.isEmpty()) { // need to reverse the order of elements in PQ to delete files from oldest to newest Stack<Pair<Path, FileTime>> stack = new Stack<>(); while (!pq.isEmpty()) { stack.push(pq.poll()); } while (!stack.isEmpty() && toDeleteSize > 0) { Pair<Path, FileTime> pair = stack.pop(); Path file = pair.getLeft(); final String canonicalPath = file.toAbsolutePath().normalize().toString(); final long fileSize = Files.size(file); final long lastModified = pair.getRight().toMillis(); //Original implementation doesn't actually check if delete succeeded or not. try { Utils.forceDelete(file.toString()); LOG.info("Delete file: {}, size: {}, lastModified: {}", canonicalPath, fileSize, lastModified); toDeleteSize -= fileSize; deletedSize += fileSize; deletedFiles++; } catch (IOException e) { excluded.add(file); } } pq.clear(); round++; if (round >= MAX_ROUNDS) { if (forPerDir) { LOG.warn( "Reach the MAX_ROUNDS: {} during per-dir deletion, you may have too many files in " + "a single directory : {}, will delete the rest files in next interval.", MAX_ROUNDS, dirs.get(0).toAbsolutePath().normalize()); } else { LOG.warn("Reach the MAX_ROUNDS: {} during global deletion, you may have too many files, " + "will delete the rest files in next interval.", MAX_ROUNDS); } break; } } else { LOG.warn("No more files able to delete this round, but {} is over quota by {} MB", forPerDir ? "this directory" : "root directory", toDeleteSize * 1e-6); } } return new DeletionMeta(deletedSize, deletedFiles); }
From source file:com.mentor.questa.vrm.jenkins.QuestaVrmHostAction.java
private CategoryDataset buildDataSet(StaplerRequest req) { boolean showAction = Boolean.valueOf(req.getParameter("showActions")) || getActionCookie(req); DataSetBuilder<String, Long> dsb = new DataSetBuilder<String, Long>(); PriorityQueue<Pair> pq = new PriorityQueue<Pair>(); HashMap<String, Integer> hostCount = new HashMap<String, Integer>(); for (TestResult temp : getRegressionResult().getActions()) { QuestaVrmAbstractResult action = (QuestaVrmAbstractResult) temp; if (showAction || action instanceof QuestaVrmTestResult) { if (action.getStartTime() == -1 || action.getDoneTime() == -1) { continue; }//from ww w . ja v a2 s . c om pq.add(new Pair(action.getStartTimeDate(), action.getHost(), 1)); pq.add(new Pair(action.getDoneTimeDate(), action.getHost(), -1)); hostCount.put(action.getHost(), 0); } } if (pq.isEmpty()) { return dsb.build(); } long offset = getRegressionResult().getRegressionBegin().getTime(); int noOfTests; HashSet<String> visited = new HashSet<String>(); while (!pq.isEmpty()) { long currentKey = pq.peek().date.getTime(); while (!pq.isEmpty() && pq.peek().date.getTime() == currentKey) { Pair current = pq.peek(); noOfTests = hostCount.get(current.host); while (!pq.isEmpty() && pq.peek().compareTo(current) == 0) { noOfTests += pq.poll().diff; } dsb.add(noOfTests, current.host, (current.date.getTime() - offset) / 1000); hostCount.put(current.host, noOfTests); visited.add(current.host); } for (String host : hostCount.keySet()) { if (!visited.contains(host)) { dsb.add(hostCount.get(host), host, (currentKey - offset) / 1000); } } visited.clear(); } return dsb.build(); }
From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigList) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). //// w w w . j a v a 2 s . c o m // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a replica with fewer segments. // // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table // that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does. // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a random replica with proper weighting // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigList); PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() { @Override public int compare(Pair<Map<String, Set<String>>, Float> left, Pair<Map<String, Set<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll(); routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey())); } return routingTables; }
From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigs) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ////w w w . ja v a 2 s . c o m // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a server with fewer segments. // // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a // routing table that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick _targetNumServersPerQuery distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a server with least segments already assigned // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigs); PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() { @Override public int compare(Pair<Map<String, List<String>>, Float> left, Pair<Map<String, List<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { routingTables.add(topRoutingTables.poll().getKey()); } setRoutingTables(routingTables); }
From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java
@Override public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) { MONITOR.startTask("parseSentence"); try {//from w w w. j a va2s . co m long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>(); PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); PriorityQueue<ParseConfiguration> backupHeap = null; PriorityQueue<ParseConfiguration> finalHeap = null; PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>(); while (heaps.size() > 0) { Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry(); PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new PriorityQueue<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.peek(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } if (earlyStop && terminalHeap.size() >= beamWidth) { LOG.debug( "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting."); finalHeap = terminalHeap; finished = true; } long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } if (finished) { break; } // limit the breadth to K int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.poll(); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<Transition> positiveRuleDecision = TalismaneSession .getTransitionSystem().createDefaultDecision(rule.getTransition()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current configuration List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("feature analyse"); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(history, env); if (featureResult != null) parseFeatureResults.add(featureResult); } finally { MONITOR.endTask(feature.getName()); } } if (LOG_FEATURES.isTraceEnabled()) { for (FeatureResult<?> featureResult : parseFeatureResults) { LOG_FEATURES.trace(featureResult.toString()); } } } finally { MONITOR.endTask("feature analyse"); } // evaluate the feature results using the decision maker MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(parseFeatureResults); for (ClassificationObserver<Transition> observer : this.observers) { observer.onAnalyse(history, parseFeatureResults, decisions); } List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>( decisions.size()); for (Decision<Transition> decision : decisions) { if (decision.getProbability() > MIN_PROB_TO_STORE) decisionShortList.add(decision); } decisions = decisionShortList; } finally { MONITOR.endTask("make decision"); } // apply the negative rules Set<Transition> eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.addAll(rule.getTransitions()); if (LOG.isTraceEnabled()) { for (Transition eliminatedTransition : rule.getTransitions()) LOG.trace("Rule applies. Eliminating transition: " + eliminatedTransition.getCode()); } } } if (eliminatedTransitions.size() > 0) { List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(); for (Decision<Transition> decision : decisions) { if (!eliminatedTransitions.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? boolean transitionApplied = false; // add new configuration to the heap, one for each valid transition MONITOR.startTask("heap sort"); try { // Why apply all decisions here? Why not just the top N (where N = beamwidth)? // Answer: because we're not always adding solutions to the same heap // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1) // So, if we've already applied N decisions of each type, we should be able to stop for (Decision<Transition> decision : decisions) { Transition transition = decision.getOutcome(); if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability()); if (transition.checkPreconditions(history)) { transitionApplied = true; ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); if (decision.isStatistical()) configuration.addDecision(decision); transition.apply(configuration); int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; if (configuration.isTerminal()) { nextHeapIndex = Integer.MAX_VALUE; } else { while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; } PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { if (configuration.isTerminal()) nextHeap = terminalHeap; else nextHeap = new PriorityQueue<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } else { if (LOG.isTraceEnabled()) LOG.trace("Cannot apply transition: doesn't meet pre-conditions"); // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); } // does transition meet pre-conditions? } // next transition } finally { MONITOR.endTask("heap sort"); } if (transitionApplied) { j++; } else { LOG.trace("No transitions could be applied: not counting this history as part of the beam"); } // beam width test if (j == maxSequences) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); LOG.debug("Decisions: " + finalConfiguration.getDecisions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (Decision<Transition> decision : finalConfiguration.getDecisions()) { sb.append(" * "); sb.append(df.format(decision.getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getTransitions().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }
From source file:org.apache.drill.exec.store.mongo.MongoGroupScan.java
@Override public void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException { logger.debug("Incoming endpoints :" + endpoints); watch.reset();// ww w . jav a2 s . co m watch.start(); final int numSlots = endpoints.size(); int totalAssignmentsTobeDone = chunksMapping.size(); Preconditions.checkArgument(numSlots <= totalAssignmentsTobeDone, String.format( "Incoming endpoints %d is greater than number of chunks %d", numSlots, totalAssignmentsTobeDone)); final int minPerEndpointSlot = (int) Math.floor((double) totalAssignmentsTobeDone / numSlots); final int maxPerEndpointSlot = (int) Math.ceil((double) totalAssignmentsTobeDone / numSlots); endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots); Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap(); for (int i = 0; i < numSlots; ++i) { endpointFragmentMapping.put(i, new ArrayList<MongoSubScanSpec>(maxPerEndpointSlot)); String hostname = endpoints.get(i).getAddress(); Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname); if (hostIndexQueue == null) { hostIndexQueue = Lists.newLinkedList(); endpointHostIndexListMap.put(hostname, hostIndexQueue); } hostIndexQueue.add(i); } Set<Entry<String, List<ChunkInfo>>> chunksToAssignSet = Sets.newHashSet(chunksInverseMapping.entrySet()); for (Iterator<Entry<String, List<ChunkInfo>>> chunksIterator = chunksToAssignSet.iterator(); chunksIterator .hasNext();) { Entry<String, List<ChunkInfo>> chunkEntry = chunksIterator.next(); Queue<Integer> slots = endpointHostIndexListMap.get(chunkEntry.getKey()); if (slots != null) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { Integer slotIndex = slots.poll(); List<MongoSubScanSpec> subScanSpecList = endpointFragmentMapping.get(slotIndex); subScanSpecList.add(buildSubScanSpecAndGet(chunkInfo)); slots.offer(slotIndex); } chunksIterator.remove(); } } PriorityQueue<List<MongoSubScanSpec>> minHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR); PriorityQueue<List<MongoSubScanSpec>> maxHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV); for (List<MongoSubScanSpec> listOfScan : endpointFragmentMapping.values()) { if (listOfScan.size() < minPerEndpointSlot) { minHeap.offer(listOfScan); } else if (listOfScan.size() > minPerEndpointSlot) { maxHeap.offer(listOfScan); } } if (chunksToAssignSet.size() > 0) { for (Entry<String, List<ChunkInfo>> chunkEntry : chunksToAssignSet) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { List<MongoSubScanSpec> smallestList = minHeap.poll(); smallestList.add(buildSubScanSpecAndGet(chunkInfo)); minHeap.offer(smallestList); } } } while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) { List<MongoSubScanSpec> smallestList = minHeap.poll(); List<MongoSubScanSpec> largestList = maxHeap.poll(); smallestList.add(largestList.remove(largestList.size() - 1)); if (largestList.size() > minPerEndpointSlot) { maxHeap.offer(largestList); } if (smallestList.size() < minPerEndpointSlot) { minHeap.offer(smallestList); } } logger.debug("Built assignment map in {} s.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, endpoints, endpointFragmentMapping.toString()); }
From source file:org.mskcc.cbio.portal.servlet.NetworkServlet.java
/** * * @param network/*from w w w . j a va 2s . c om*/ * @param n * @return */ private List<Node> getNodesToRemove(final Network network, final double diffusion, final int n) { final Map<Node, Double> mapDiffusion = getMapDiffusedTotalAlteredPercentage(network, diffusion); // keep track of the top nKeep PriorityQueue<Node> topAlteredNodes = new PriorityQueue<Node>(n, new Comparator<Node>() { public int compare(Node n1, Node n2) { int ret = mapDiffusion.get(n1).compareTo(mapDiffusion.get(n2)); if (diffusion != 0 && ret == 0) { // if the same diffused perc, use own perc ret = Double.compare(getTotalAlteredPercentage(n1), getTotalAlteredPercentage(n2)); } if (ret == 0) { // if the same, rank according to degree ret = network.getDegree(n1) - network.getDegree(n2); } return ret; } }); List<Node> nodesToRemove = new ArrayList<Node>(); for (Node node : network.getNodes()) { if (isInQuery(node) || node.getType().equals(NodeType.DRUG)) { continue; } if (topAlteredNodes.size() < n) { topAlteredNodes.add(node); } else { if (n == 0) { nodesToRemove.add(node); } else { if (mapDiffusion.get(node) > mapDiffusion.get(topAlteredNodes.peek())) { nodesToRemove.add(topAlteredNodes.poll()); topAlteredNodes.add(node); } else { nodesToRemove.add(node); } } } } return nodesToRemove; }
From source file:com.uber.stream.kafka.mirrormaker.manager.core.ControllerHelixManager.java
public void scaleCurrentCluster() throws Exception { int oldTotalNumWorker = 0; int newTotalNumWorker = 0; Map<String, Integer> _routeWorkerOverrides = getRouteWorkerOverride(); for (String pipeline : _pipelineToInstanceMap.keySet()) { LOGGER.info("Start rescale pipeline: {}", pipeline); PriorityQueue<InstanceTopicPartitionHolder> newItphQueue = new PriorityQueue<>(1, InstanceTopicPartitionHolder.totalWorkloadComparator(_pipelineWorkloadMap)); // TODO: what if routeId is not continuous int nextRouteId = _pipelineToInstanceMap.get(pipeline).size(); for (InstanceTopicPartitionHolder itph : _pipelineToInstanceMap.get(pipeline)) { if (itph.getTotalNumPartitions() > _maxNumPartitionsPerRoute) { LOGGER.info(//from w w w. ja v a 2 s . c o m "Checking route {} with controller {} and topics {} since it exceeds maxNumPartitionsPerRoute {}", itph.getRouteString(), itph.getInstanceName(), itph.getServingTopicPartitionSet(), _maxNumPartitionsPerRoute); while (itph.getTotalNumPartitions() > _maxNumPartitionsPerRoute) { // Only one topic left, do nothing if (itph.getNumServingTopicPartitions() == 1) { LOGGER.info("Only one topic {} in route {}, do nothing", itph.getServingTopicPartitionSet().iterator().next(), itph.getRouteString()); break; } // Get the topic with largest number of partitions TopicPartition tpToMove = new TopicPartition("tmp", -1); for (TopicPartition tp : itph.getServingTopicPartitionSet()) { if (tp.getPartition() > tpToMove.getPartition()) { tpToMove = tp; } } // If existing lightest route cannot fit the largest topic to move if (newItphQueue.isEmpty() || newItphQueue.peek().getTotalNumPartitions() + tpToMove.getPartition() > _initMaxNumPartitionsPerRoute) { try { InstanceTopicPartitionHolder newHolder = createNewRoute(pipeline, nextRouteId); _helixAdmin.setResourceIdealState(_helixClusterName, tpToMove.getTopic(), IdealStateBuilder.resetCustomIdealStateFor( _helixAdmin.getResourceIdealState(_helixClusterName, tpToMove.getTopic()), tpToMove.getTopic(), itph.getRouteString(), newHolder.getRouteString(), newHolder.getInstanceName())); itph.removeTopicPartition(tpToMove); newHolder.addTopicPartition(tpToMove); newItphQueue.add(newHolder); nextRouteId++; } catch (Exception e) { LOGGER.error("Got exception when create a new route when rebalancing, abandon!", e); throw new Exception( "Got exception when create a new route when rebalancing, abandon!", e); } } else { InstanceTopicPartitionHolder newHolder = newItphQueue.poll(); _helixAdmin.setResourceIdealState(_helixClusterName, tpToMove.getTopic(), IdealStateBuilder.resetCustomIdealStateFor( _helixAdmin.getResourceIdealState(_helixClusterName, tpToMove.getTopic()), tpToMove.getTopic(), itph.getRouteString(), newHolder.getRouteString(), newHolder.getInstanceName())); itph.removeTopicPartition(tpToMove); newHolder.addTopicPartition(tpToMove); newItphQueue.add(newHolder); } } } newItphQueue.add(itph); } // After moving topics, scale workers based on workload int rescaleFailedCount = 0; for (InstanceTopicPartitionHolder itph : newItphQueue) { oldTotalNumWorker += itph.getWorkerSet().size(); String routeString = itph.getRouteString(); int initWorkerCount = _initMaxNumWorkersPerRoute; if (_routeWorkerOverrides.containsKey(routeString) && _routeWorkerOverrides.get(routeString) > initWorkerCount) { initWorkerCount = _routeWorkerOverrides.get(routeString); } String hostname = getHostname(itph.getInstanceName()); try { String result = HttpClientUtils.getData(_httpClient, _requestConfig, hostname, _controllerPort, "/admin/workloadinfo"); ControllerWorkloadInfo workloadInfo = JSONObject.parseObject(result, ControllerWorkloadInfo.class); TopicWorkload totalWorkload = workloadInfo.getTopicWorkload(); if (workloadInfo != null && workloadInfo.getNumOfExpectedWorkers() != 0) { _pipelineWorkloadMap.put(itph.getRouteString(), totalWorkload); int expectedNumWorkers = workloadInfo.getNumOfExpectedWorkers(); LOGGER.info("Current {} workers in route {}, expect {} workers", itph.getWorkerSet().size(), itph.getRouteString(), expectedNumWorkers); int actualExpectedNumWorkers = getActualExpectedNumWorkers(expectedNumWorkers, initWorkerCount); LOGGER.info("Current {} workers in route {}, actual expect {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers); if (actualExpectedNumWorkers > itph.getWorkerSet().size()) { LOGGER.info("Current {} workers in route {}, actual expect {} workers, add {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers, actualExpectedNumWorkers - itph.getWorkerSet().size()); // TODO: handle exception _workerHelixManager.addWorkersToMirrorMaker(itph, itph.getRoute().getTopic(), itph.getRoute().getPartition(), actualExpectedNumWorkers - itph.getWorkerSet().size()); } if (actualExpectedNumWorkers < itph.getWorkerSet().size()) { LOGGER.info( "Current {} workers in route {}, actual expect {} workers, remove {} workers", itph.getWorkerSet().size(), itph.getRouteString(), actualExpectedNumWorkers, itph.getWorkerSet().size() - actualExpectedNumWorkers); // TODO: handle exception _workerHelixManager.removeWorkersToMirrorMaker(itph, itph.getRoute().getTopic(), itph.getRoute().getPartition(), itph.getWorkerSet().size() - actualExpectedNumWorkers); } newTotalNumWorker += actualExpectedNumWorkers; } else { LOGGER.warn("Get workload on {} for route: {} returns 0. No change on number of workers", hostname, itph.getRouteString()); newTotalNumWorker += itph.getWorkerSet().size(); rescaleFailedCount++; } } catch (Exception e) { rescaleFailedCount++; LOGGER.error(String.format( "Get workload error when connecting to %s for route %s. No change on number of workers", hostname, itph.getRouteString()), e); newTotalNumWorker += itph.getWorkerSet().size(); rescaleFailedCount++; } } _pipelineToInstanceMap.put(pipeline, newItphQueue); _rescaleFailedCount.inc(rescaleFailedCount - _rescaleFailedCount.getCount()); } LOGGER.info("oldTotalNumWorker: {}, newTotalNumWorker: {}", oldTotalNumWorker, newTotalNumWorker); }