List of usage examples for java.util PriorityQueue size
int size
To view the source code for java.util PriorityQueue size.
Click Source Link
From source file:edu.snu.leader.hidden.SpatialIndividual.java
/** * Finds the nearest neighbors for this individual * * @param simState/*from w w w. ja va 2 s.c o m*/ */ public void findNearestNeighbors(SimulationState simState) { _LOG.trace("Entering findNearestNeighbors( simState )"); // Get the number of nearest neighbors _nearestNeighborCount = simState.getNearestNeighborCount(); // Build a priority queue to sort things for us PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>(); // Iterate through all the individuals Iterator<SpatialIndividual> indIter = simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { // Get the individual SpatialIndividual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { Neighbor neighbor = sortedNeighbors.poll(); _nearestNeighbors.add(neighbor); neighbor.getIndividual().signalNearestNeighborStatus(this); // _LOG.debug( "Nearest neighbor: id=[" // + getID() // + "] neighbor=[" // + neighbor.getIndividual().getID() // + "]" ); } _LOG.trace("Leaving findNearestNeighbors( simState )"); }
From source file:org.mskcc.cbio.portal.servlet.NetworkServlet.java
/** * * @param network// www. j a va 2s. com * @param n * @return */ private List<Node> getNodesToRemove(final Network network, final double diffusion, final int n) { final Map<Node, Double> mapDiffusion = getMapDiffusedTotalAlteredPercentage(network, diffusion); // keep track of the top nKeep PriorityQueue<Node> topAlteredNodes = new PriorityQueue<Node>(n, new Comparator<Node>() { public int compare(Node n1, Node n2) { int ret = mapDiffusion.get(n1).compareTo(mapDiffusion.get(n2)); if (diffusion != 0 && ret == 0) { // if the same diffused perc, use own perc ret = Double.compare(getTotalAlteredPercentage(n1), getTotalAlteredPercentage(n2)); } if (ret == 0) { // if the same, rank according to degree ret = network.getDegree(n1) - network.getDegree(n2); } return ret; } }); List<Node> nodesToRemove = new ArrayList<Node>(); for (Node node : network.getNodes()) { if (isInQuery(node) || node.getType().equals(NodeType.DRUG)) { continue; } if (topAlteredNodes.size() < n) { topAlteredNodes.add(node); } else { if (n == 0) { nodesToRemove.add(node); } else { if (mapDiffusion.get(node) > mapDiffusion.get(topAlteredNodes.peek())) { nodesToRemove.add(topAlteredNodes.poll()); topAlteredNodes.add(node); } else { nodesToRemove.add(node); } } } } return nodesToRemove; }
From source file:org.kuali.rice.krms.framework.engine.TermResolutionEngineImpl.java
/** * * @param termName/* w w w .j a v a2 s .c o m*/ * @return List<{@link TermResolverKey}> */ protected List<TermResolverKey> buildTermResolutionPlan(String termName) { // our result List<TermResolverKey> resolutionPlan = null; // Holds the resolvers we've visited, along with the needed metadata for generating our final plan Map<TermResolverKey, Visited> visitedByKey = new HashMap<TermResolverKey, Visited>(); // this holds a least cost first list of nodes remaining to be explored PriorityQueue<ToVisit> toVisits = new PriorityQueue<ToVisit>(); // nice grammar there cowboy // dummy resolver to be the root of this tree // Do I really need this? Yes, because there may be more than one resolver that resolves to the desired termName, // so this destination unifies the trees of those candidate resolvers TermResolver destination = createDestination(termName); // problem is we can't get this one out of the registry TermResolverKey destinationKey = new TermResolverKey(destination); LOG.debug("Beginning resolution tree search for " + termName); // seed our queue of resolvers to visit // need to be aware of null parent for root ToVisit toVisits.add(new ToVisit(0, destination, null)); // there may not be a viable plan boolean plannedToDestination = false; // We'll do a modified Dijkstra's shortest path algorithm, where at each leaf we see if we've planned out // termName resolution all the way up to the root, our destination. If so, we just reconstruct our plan. while (!plannedToDestination && toVisits.size() > 0) { // visit least cost node remaining ToVisit visiting = toVisits.poll(); LOG.debug("visiting " + visiting.getTermResolverKey()); // the resolver is the edge in our tree -- we don't get it directly from the termResolversByKey Map, because it could be our destination TermResolver resolver = getResolver(visiting.getTermResolverKey(), destination, destinationKey); TermResolver parent = getResolver(visiting.getParentKey(), destination, destinationKey); if (visitedByKey.containsKey(visiting.getTermResolverKey())) { continue; // We've already visited this one } Visited parentVisited = visitedByKey.get(visiting.getParentKey()); if (resolver == null) throw new RuntimeException("Unable to get TermResolver by its key"); Set<String> prereqs = resolver.getPrerequisites(); // keep track of any prereqs that we already have handy List<String> metPrereqs = new LinkedList<String>(); // see what prereqs we have already, and which we'll need to visit if (prereqs != null) for (String prereq : prereqs) { if (!termCache.containsKey(new Term(prereq, null))) { // enqueue all resolvers in toVisits List<TermResolver<?>> prereqResolvers = termResolversByOutput.get(prereq); if (prereqResolvers != null) for (TermResolver prereqResolver : prereqResolvers) { // Only TermResolvers that don't take paramaterized terms can be chained, so: // if the TermResolver doesn't take parameters, or it resolves the output termName if (CollectionUtils.isEmpty(prereqResolver.getParameterNames()) || termName.equals(prereqResolver.getOutput())) { // queue it up for visiting toVisits.add(new ToVisit(visiting.getCost() /* cost to get to this resolver */, prereqResolver, resolver)); } } } else { metPrereqs.add(prereq); } } // Build visited info Visited visited = buildVisited(resolver, parentVisited, metPrereqs); visitedByKey.put(visited.getResolverKey(), visited); plannedToDestination = isPlannedBackToDestination(visited, destinationKey, visitedByKey); } if (plannedToDestination) { // build result from Visited tree. resolutionPlan = new LinkedList<TermResolverKey>(); assembleLinearResolutionPlan(visitedByKey.get(destinationKey), visitedByKey, resolutionPlan); } return resolutionPlan; }
From source file:$.HyperGraphBuilder$.java
@Override public V_GenericGraph makeGraphResponse(final V_GraphQuery graphQuery) throws Exception { nodeList = new HashMap<String, V_GenericNode>(); // edgeMap = new HashMap<String, V_GenericEdge>(); edgeList = new HashMap<String, V_GenericEdge>(); scannedQueries = new HashSet<String>(); final PriorityQueue<G_EntityQuery> queriesToRun = new PriorityQueue<G_EntityQuery>(10, new ScoreComparator()); Map<String, V_GenericNode> nodesFromPreviousDegree = new HashMap<String, V_GenericNode>(); Map<String, V_GenericEdge> edgesFromPreviousDegree = new HashMap<String, V_GenericEdge>(); if (graphQuery.getMaxHops() <= 0) { return new V_GenericGraph(); } else {/*from w ww. j a v a 2 s . c o m*/ logger.debug("Attempting a graph for query " + graphQuery.toString()); } int intStatus = 0; String strStatus = "Graph Loaded"; final G_PropertyMatchDescriptor identifierList = G_PropertyMatchDescriptor.newBuilder().setKey("_all") .setListRange(new ListRangeHelper(G_PropertyType.STRING, graphQuery.getSearchIds())) .setConstraint(G_Constraint.EQUALS).build(); final QueryHelper qh = new QueryHelper(identifierList); qh.setTargetSchema(index); queriesToRun.add(qh); int currentDegree = 0; for (currentDegree = 0; (currentDegree < graphQuery.getMaxHops()) && (nodeList.size() < graphQuery.getMaxNodes()); currentDegree++) { G_EntityQuery eq = null; logger.debug("${symbol_dollar}${symbol_dollar}${symbol_dollar}${symbol_dollar}There are " + queriesToRun.size() + " queries to run in the current degree."); while ((queriesToRun.size() > 0) && ((eq = queriesToRun.poll()) != null) && (nodeList.size() < graphQuery.getMaxNodes())) { if (ValidationUtils.isValid(eq.getPropertyMatchDescriptors())) { nodesFromPreviousDegree = new HashMap<String, V_GenericNode>(nodeList); edgesFromPreviousDegree = new HashMap<String, V_GenericEdge>(edgeList); logger.debug("Processing degree " + currentDegree); /** * This will end up building nodes and edges, and creating * new queries for the queue */ logger.debug("1111=====Running query " + eq.toString()); getDAO().performCallback(0, eq.getMaxResult(), this, eq); logger.debug("3333====After running " + eq.toString() + ", there are " + queriesToRunNextDegree.size() + " queries to run in the next degree."); } } // end while loop // very important!! // unscannedNodeList.clear(); // //////////////////////////////////////////////// logger.debug("4444==== At the end of degree " + currentDegree + ", there are " + nodeList.size() + " nodes and " + edgeList.size() + " edges"); logger.debug( "5555====There are " + queriesToRunNextDegree.size() + " queries to run in the next degree."); queriesToRun.addAll(queriesToRunNextDegree); queriesToRunNextDegree.clear(); } // All hops have been done // Check to see if we have too many nodes. if (nodeList.size() > graphQuery.getMaxNodes()) { nodeList = nodesFromPreviousDegree; edgeList = edgesFromPreviousDegree; intStatus = 1; // will trigger the message. strStatus = "Returning only " + currentDegree + " hops, as maximum nodes you requested would be exceeded"; } else { intStatus = 1; // will trigger the message. strStatus = "Returning " + nodeList.size() + " nodes and " + edgeList.size() + " edges."; } // NOW finally add in all those unique edges. performPostProcess(graphQuery); final V_GenericGraph g = new V_GenericGraph(nodeList, edgeList); g.setIntStatus(intStatus); g.setStrStatus(strStatus); logger.debug("Graph status: " + g.getStrStatus()); for (final V_LegendItem li : legendItems) { g.addLegendItem(li); } return g; }
From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigList) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ///*from ww w .j ava 2 s. c o m*/ // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a replica with fewer segments. // // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table // that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does. // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a random replica with proper weighting // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigList); PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() { @Override public int compare(Pair<Map<String, Set<String>>, Float> left, Pair<Map<String, Set<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll(); routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey())); } return routingTables; }
From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigs) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ////ww w.j a v a 2 s . c o m // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a server with fewer segments. // // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a // routing table that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick _targetNumServersPerQuery distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a server with least segments already assigned // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigs); PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() { @Override public int compare(Pair<Map<String, List<String>>, Float> left, Pair<Map<String, List<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { routingTables.add(topRoutingTables.poll().getKey()); } setRoutingTables(routingTables); }
From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java
@Override public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) { MONITOR.startTask("parseSentence"); try {/*from w w w . ja v a 2s . co m*/ long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>(); PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); PriorityQueue<ParseConfiguration> backupHeap = null; PriorityQueue<ParseConfiguration> finalHeap = null; PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>(); while (heaps.size() > 0) { Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry(); PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new PriorityQueue<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.peek(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } if (earlyStop && terminalHeap.size() >= beamWidth) { LOG.debug( "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting."); finalHeap = terminalHeap; finished = true; } long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } if (finished) { break; } // limit the breadth to K int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.poll(); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<Transition> positiveRuleDecision = TalismaneSession .getTransitionSystem().createDefaultDecision(rule.getTransition()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current configuration List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("feature analyse"); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(history, env); if (featureResult != null) parseFeatureResults.add(featureResult); } finally { MONITOR.endTask(feature.getName()); } } if (LOG_FEATURES.isTraceEnabled()) { for (FeatureResult<?> featureResult : parseFeatureResults) { LOG_FEATURES.trace(featureResult.toString()); } } } finally { MONITOR.endTask("feature analyse"); } // evaluate the feature results using the decision maker MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(parseFeatureResults); for (ClassificationObserver<Transition> observer : this.observers) { observer.onAnalyse(history, parseFeatureResults, decisions); } List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>( decisions.size()); for (Decision<Transition> decision : decisions) { if (decision.getProbability() > MIN_PROB_TO_STORE) decisionShortList.add(decision); } decisions = decisionShortList; } finally { MONITOR.endTask("make decision"); } // apply the negative rules Set<Transition> eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.addAll(rule.getTransitions()); if (LOG.isTraceEnabled()) { for (Transition eliminatedTransition : rule.getTransitions()) LOG.trace("Rule applies. Eliminating transition: " + eliminatedTransition.getCode()); } } } if (eliminatedTransitions.size() > 0) { List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(); for (Decision<Transition> decision : decisions) { if (!eliminatedTransitions.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? boolean transitionApplied = false; // add new configuration to the heap, one for each valid transition MONITOR.startTask("heap sort"); try { // Why apply all decisions here? Why not just the top N (where N = beamwidth)? // Answer: because we're not always adding solutions to the same heap // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1) // So, if we've already applied N decisions of each type, we should be able to stop for (Decision<Transition> decision : decisions) { Transition transition = decision.getOutcome(); if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability()); if (transition.checkPreconditions(history)) { transitionApplied = true; ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); if (decision.isStatistical()) configuration.addDecision(decision); transition.apply(configuration); int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; if (configuration.isTerminal()) { nextHeapIndex = Integer.MAX_VALUE; } else { while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; } PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { if (configuration.isTerminal()) nextHeap = terminalHeap; else nextHeap = new PriorityQueue<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } else { if (LOG.isTraceEnabled()) LOG.trace("Cannot apply transition: doesn't meet pre-conditions"); // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); } // does transition meet pre-conditions? } // next transition } finally { MONITOR.endTask("heap sort"); } if (transitionApplied) { j++; } else { LOG.trace("No transitions could be applied: not counting this history as part of the beam"); } // beam width test if (j == maxSequences) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); LOG.debug("Decisions: " + finalConfiguration.getDecisions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (Decision<Transition> decision : finalConfiguration.getDecisions()) { sb.append(" * "); sb.append(df.format(decision.getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getTransitions().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }
From source file:com.joliciel.talismane.tokeniser.patterns.CompoundPatternTokeniser.java
@Override public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) { MONITOR.startTask("tokeniseWithDecisions"); try {//from w w w . java 2 s . c o m // apply any pre-tokenisation decisions via filters // we only want one placeholder per start index - the first one that gets added Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>(); for (TokenFilter tokenFilter : this.tokenFilters) { Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText()); for (TokenPlaceholder placeholder : myPlaceholders) { if (!placeholderMap.containsKey(placeholder.getStartIndex())) { placeholderMap.put(placeholder.getStartIndex(), placeholder); } } if (LOG.isTraceEnabled()) { if (myPlaceholders.size() > 0) { LOG.trace("TokenFilter: " + tokenFilter); LOG.trace("placeholders: " + myPlaceholders); } } } Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values()); // Initially, separate the sentence into tokens using the separators provided TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS, placeholders); // apply any pre-processing filters that have been added for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(tokenSequence); } // Assign each separator its default value List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence); List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>( defaultOutcomes.size()); for (TokeniserOutcome outcome : defaultOutcomes) { Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory .createDefaultDecision(outcome); tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName()); tokeniserDecision.addAuthority("_" + "DefaultDecision"); defaultDecisions.add(tokeniserDecision); } List<TokenisedAtomicTokenSequence> sequences = null; // For each test pattern, see if anything in the sentence matches it if (this.decisionMaker != null) { List<TokenPatternMatchSequence> matchingSequences = new ArrayList<TokenPatternMatchSequence>(); Map<Token, Set<TokenPatternMatchSequence>> tokenMatchSequenceMap = new HashMap<Token, Set<TokenPatternMatchSequence>>(); Map<TokenPatternMatchSequence, TokenPatternMatch> primaryMatchMap = new HashMap<TokenPatternMatchSequence, TokenPatternMatch>(); Set<Token> matchedTokens = new HashSet<Token>(); MONITOR.startTask("pattern matching"); try { for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) { List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence); for (TokenPatternMatchSequence matchSequence : matchesForThisPattern) { matchingSequences.add(matchSequence); matchedTokens.addAll(matchSequence.getTokensToCheck()); TokenPatternMatch primaryMatch = null; Token token = matchSequence.getTokensToCheck().get(0); Set<TokenPatternMatchSequence> matchSequences = tokenMatchSequenceMap.get(token); if (matchSequences == null) { matchSequences = new TreeSet<TokenPatternMatchSequence>(); tokenMatchSequenceMap.put(token, matchSequences); } matchSequences.add(matchSequence); for (TokenPatternMatch patternMatch : matchSequence.getTokenPatternMatches()) { if (patternMatch.getToken().equals(token)) { primaryMatch = patternMatch; break; } } if (LOG.isTraceEnabled()) { LOG.trace("Found match: " + primaryMatch); } primaryMatchMap.put(matchSequence, primaryMatch); } } } finally { MONITOR.endTask("pattern matching"); } // we want to create the n most likely token sequences // the sequence has to correspond to a token pattern Map<TokenPatternMatchSequence, List<Decision<TokeniserOutcome>>> matchSequenceDecisionMap = new HashMap<TokenPatternMatchSequence, List<Decision<TokeniserOutcome>>>(); for (TokenPatternMatchSequence matchSequence : matchingSequences) { TokenPatternMatch match = primaryMatchMap.get(matchSequence); LOG.debug("next pattern match: " + match.toString()); List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (TokenPatternMatchFeature<?> feature : features) { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(match, env); if (featureResult != null) { tokenFeatureResults.add(featureResult); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> featureResult : tokenFeatureResults) { LOG.trace(featureResult.toString()); } } } finally { MONITOR.endTask("analyse features"); } List<Decision<TokeniserOutcome>> decisions = null; MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(tokenFeatureResults); for (ClassificationObserver<TokeniserOutcome> observer : this.observers) observer.onAnalyse(match.getToken(), tokenFeatureResults, decisions); for (Decision<TokeniserOutcome> decision : decisions) { decision.addAuthority("_" + this.getClass().getSimpleName()); decision.addAuthority("_" + "Patterns"); decision.addAuthority(match.getPattern().getName()); } } finally { MONITOR.endTask("make decision"); } matchSequenceDecisionMap.put(matchSequence, decisions); } // initially create a heap with a single, empty sequence PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); heap.add(emptySequence); for (int i = 0; i < tokenSequence.listWithWhiteSpace().size(); i++) { Token token = tokenSequence.listWithWhiteSpace().get(i); if (LOG.isTraceEnabled()) { LOG.trace("Token : \"" + token.getText() + "\""); } // build a new heap for this iteration PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap; heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); if (i == 0) { // first token is always "separate" from the outside world Decision<TokeniserOutcome> decision = this.tokeniserDecisionFactory .createDefaultDecision(TokeniserOutcome.SEPARATE); decision.addAuthority("_" + this.getClass().getSimpleName()); decision.addAuthority("_" + "DefaultDecision"); TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token, decision); TokenisedAtomicTokenSequence newSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(emptySequence); newSequence.add(taggedToken); heap.add(newSequence); continue; } // limit the heap breadth to K int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth() : previousHeap.size(); MONITOR.startTask("heap sort"); try { for (int j = 0; j < maxSequences; j++) { TokenisedAtomicTokenSequence history = previousHeap.poll(); // Find the separating & non-separating decisions if (history.size() > i) { // token already added as part of a sequence introduced by another token heap.add(history); } else if (tokenMatchSequenceMap.containsKey(token)) { // token begins one or more match sequences // these are ordered from shortest to longest (via TreeSet) List<TokenPatternMatchSequence> matchSequences = new ArrayList<TokenPatternMatchSequence>( tokenMatchSequenceMap.get(token)); // Since sequences P1..Pn contain each other, // there can be exactly matchSequences.size() consistent solutions // Assume the default is separate // 0: all separate // 1: join P1, separate rest // 2: join P2, separate rest // ... // n: join Pn // We need to add each of these to the heap // by taking the product of all probabilities consistent with each solution // The probabities for each solution are (j=join, s=separate) // All separate: s1 x s2 x ... x sn // P1: j1 x s2 x ... x sn // P2: j1 x j2 x ... x sn // ... // Pn: j1 x j2 x ... x jn // Any solution of the form s1 x j2 would be inconsistent, and is not considered // If Pi and Pj start and end on the exact same token, then the solution for both is // Pi: j1 x ... x ji x jj x sj+1 ... x sn // Pj: j1 x ... x ji x jj x sj+1 ... x sn // Note of course that we're never likely to have more than two Ps here, // but we need a solution for more just to be sure to be sure TokeniserOutcome defaultOutcome = defaultDecisions .get(token.getIndexWithWhiteSpace()).getOutcome(); TokeniserOutcome otherOutcome = null; if (defaultOutcome == TokeniserOutcome.SEPARATE) otherOutcome = TokeniserOutcome.JOIN; else otherOutcome = TokeniserOutcome.SEPARATE; double[] decisionProbs = new double[matchSequences.size() + 1]; for (int k = 0; k < decisionProbs.length; k++) decisionProbs[k] = 1; // Note: k0 = default decision (e.g. separate all), k1=first pattern // p1 = first pattern int p = 1; int prevEndIndex = -1; for (TokenPatternMatchSequence matchSequence : matchSequences) { int endIndex = matchSequence.getTokensToCheck() .get(matchSequence.getTokensToCheck().size() - 1).getEndIndex(); List<Decision<TokeniserOutcome>> decisions = matchSequenceDecisionMap .get(matchSequence); for (Decision<TokeniserOutcome> decision : decisions) { for (int k = 0; k < decisionProbs.length; k++) { if (decision.getOutcome() == defaultOutcome) { // e.g. separate in most cases if (k < p && endIndex > prevEndIndex) decisionProbs[k] *= decision.getProbability(); else if (k + 1 < p && endIndex <= prevEndIndex) decisionProbs[k] *= decision.getProbability(); } else { // e.g. join in most cases if (k >= p && endIndex > prevEndIndex) decisionProbs[k] *= decision.getProbability(); else if (k + 1 >= p && endIndex <= prevEndIndex) decisionProbs[k] *= decision.getProbability(); } } // next k } // next decision (only 2 of these) prevEndIndex = endIndex; p++; } // transform to probability distribution double sumProbs = 0; for (int k = 0; k < decisionProbs.length; k++) sumProbs += decisionProbs[k]; if (sumProbs > 0) for (int k = 0; k < decisionProbs.length; k++) decisionProbs[k] /= sumProbs; // Apply default decision // Since this is the default decision for all tokens in the sequence, we don't add the other tokens for now, // so as to allow them // to get examined one at a time, just in case one of them starts its own separate sequence Decision<TokeniserOutcome> defaultDecision = this.tokeniserDecisionFactory .createDecision(defaultOutcome.getCode(), decisionProbs[0]); defaultDecision.addAuthority("_" + this.getClass().getSimpleName()); defaultDecision.addAuthority("_" + "Patterns"); for (TokenPatternMatchSequence matchSequence : matchSequences) { defaultDecision.addAuthority(matchSequence.getTokenPattern().getName()); } TaggedToken<TokeniserOutcome> defaultTaggedToken = this.tokeniserService .getTaggedToken(token, defaultDecision); TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); defaultSequence.add(defaultTaggedToken); defaultSequence.addDecision(defaultDecision); heap.add(defaultSequence); // Apply one non-default decision per match sequence for (int k = 0; k < matchSequences.size(); k++) { TokenPatternMatchSequence matchSequence = matchSequences.get(k); double prob = decisionProbs[k + 1]; Decision<TokeniserOutcome> decision = this.tokeniserDecisionFactory .createDecision(otherOutcome.getCode(), prob); decision.addAuthority("_" + this.getClass().getSimpleName()); decision.addAuthority("_" + "Patterns"); decision.addAuthority(matchSequence.getTokenPattern().getName()); TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService .getTaggedToken(token, decision); TokenisedAtomicTokenSequence newSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); newSequence.add(taggedToken); newSequence.addDecision(decision); // The decision is NOT the default decision for all tokens in the sequence, add all other tokens // in this sequence to the solution for (Token tokenInSequence : matchSequence.getTokensToCheck()) { if (tokenInSequence.equals(token)) { continue; } Decision<TokeniserOutcome> decisionInSequence = this.tokeniserDecisionFactory .createDefaultDecision(decision.getOutcome()); decisionInSequence.addAuthority("_" + this.getClass().getSimpleName()); decisionInSequence.addAuthority("_" + "DecisionInSequence"); decisionInSequence.addAuthority("_" + "DecisionInSequence_non_default"); decisionInSequence.addAuthority("_" + "Patterns"); TaggedToken<TokeniserOutcome> taggedTokenInSequence = this.tokeniserService .getTaggedToken(tokenInSequence, decisionInSequence); newSequence.add(taggedTokenInSequence); } heap.add(newSequence); } // next sequence } else { // token doesn't start match sequence, and hasn't already been added to the current sequence Decision<TokeniserOutcome> decision = defaultDecisions.get(i); if (matchedTokens.contains(token)) { decision = this.tokeniserDecisionFactory .createDefaultDecision(decision.getOutcome()); decision.addAuthority("_" + this.getClass().getSimpleName()); decision.addAuthority("_" + "DecisionInSequence"); decision.addAuthority("_" + "DecisionInSequence_default"); decision.addAuthority("_" + "Patterns"); } TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService .getTaggedToken(token, decision); TokenisedAtomicTokenSequence newSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); newSequence.add(taggedToken); heap.add(newSequence); } } // next sequence in the old heap } finally { MONITOR.endTask("heap sort"); } } // next token sequences = new ArrayList<TokenisedAtomicTokenSequence>(); int k = 0; while (!heap.isEmpty()) { sequences.add(heap.poll()); k++; if (k >= this.getBeamWidth()) break; } } else { sequences = new ArrayList<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token, defaultDecisions.get(i++)); defaultSequence.add(taggedToken); } sequences.add(defaultSequence); } // have decision maker? LOG.debug("####Final token sequences:"); int j = 1; for (TokenisedAtomicTokenSequence sequence : sequences) { TokenSequence newTokenSequence = sequence.inferTokenSequence(); if (LOG.isDebugEnabled()) { LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Atomic sequence: " + sequence); LOG.debug("Resulting sequence: " + newTokenSequence); } // need to re-apply the pre-processing filters, because the tokens are all new // Question: why can't we conserve the initial tokens when they haven't changed at all? // Answer: because the tokenSequence and index in the sequence is referenced by the token. // Question: should we create a separate class, Token and TokenInSequence, // one with index & sequence access & one without? for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(newTokenSequence); } if (LOG.isDebugEnabled()) { LOG.debug("After filters: " + newTokenSequence); } } return sequences; } finally { MONITOR.endTask("tokeniseWithDecisions"); } }
From source file:mondrian.olap.fun.FunUtil.java
/** * Julian's algorithm for stable partial sort. Improves Pedro's algorithm * by using a heap (priority queue) for the top {@code limit} items seen. * The items on the priority queue have an ordinal field, so the queue * can be used to generate a list of stably sorted items. (Heap sort is * not normally stable.)//www . j av a2 s . co m * * @param list List to sort * @param comp Comparator * @param limit Maximum number of items to return * @param <T> Element type * @return Sorted list, containing at most limit items */ public static <T> List<T> stablePartialSortJulian(final List<T> list, final Comparator<T> comp, int limit) { final Comparator<ObjIntPair<T>> comp2 = new Comparator<ObjIntPair<T>>() { public int compare(ObjIntPair<T> o1, ObjIntPair<T> o2) { int c = comp.compare(o1.t, o2.t); if (c == 0) { c = Util.compare(o1.i, o2.i); } return -c; } }; int filled = 0; final PriorityQueue<ObjIntPair<T>> queue = new PriorityQueue<ObjIntPair<T>>(limit, comp2); for (T element : list) { if (filled < limit) { queue.offer(new ObjIntPair<T>(element, filled++)); } else { ObjIntPair<T> head = queue.element(); if (comp.compare(element, head.t) <= 0) { ObjIntPair<T> item = new ObjIntPair<T>(element, filled++); if (comp2.compare(item, head) >= 0) { ObjIntPair poll = queue.remove(); Util.discard(poll); queue.offer(item); } } } } int n = queue.size(); final Object[] elements = new Object[n]; while (n > 0) { elements[--n] = queue.poll().t; } assert queue.isEmpty(); //noinspection unchecked return Arrays.asList((T[]) elements); }
From source file:org.apache.hadoop.hbase.extended.loadbalance.strategies.hotspot.HotSpotLoadBalancer.java
@Override public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) { initParameters();// ww w.j av a 2s .c o m /** * <pre> * We atleast need two priority queues * a) It would contain HotSpot regions with their load as the moving criteria (max priority queue) * b) Non hot spot region with their loads (min priority queue) * * Further we need to iterate over these queues and decrease the load so we * need a data structure to build these queues * and lastly we need to return the Region plan. * </pre> */ LOG.debug("#################Came in the new Balancer Code and the cluster status is = " + this.status); long startTime = System.currentTimeMillis(); int numServers = clusterState.size(); if (numServers == 0) { LOG.info("numServers=0 so skipping load balancing"); return null; } NavigableMap<HotSpotServerAndLoad, List<HotSpotRegionLoad>> regionServerAndServerLoadMap = new TreeMap<HotSpotServerAndLoad, List<HotSpotRegionLoad>>(); PriorityQueue<HotSpotServerAndLoad> hotspotRegionServers = new PriorityQueue<HotSpotServerAndLoad>( numServers, HotSpotServerAndLoad.DESC_LOAD); PriorityQueue<HotSpotServerAndLoad> nonHotspotRegionServers = new PriorityQueue<HotSpotServerAndLoad>( numServers, HotSpotServerAndLoad.ASC_LOAD); HashBiMap<HRegionInfo, HotSpotRegionLoad> allRegionsLoadBiMap = HashBiMap.create(); LOG.debug("#################clusterState=" + clusterState); double normalisedTotalLoadOfAllRegions = initRegionLoadMapsBasedOnInput(clusterState, regionServerAndServerLoadMap, allRegionsLoadBiMap); LOG.debug("#################normalisedTotalLoadOfAllRegions=" + normalisedTotalLoadOfAllRegions); // Check if we even need to do any load balancing double average = normalisedTotalLoadOfAllRegions / numServers; // for // logging // HBASE-3681 check sloppiness first LOG.debug("######################## final regionServerAndServerLoadMap == " + regionServerAndServerLoadMap); if (!loadBalancingNeeded(numServers, regionServerAndServerLoadMap, normalisedTotalLoadOfAllRegions, average)) { // we do not need load balancing return null; } double minLoad = normalisedTotalLoadOfAllRegions / numServers; double maxLoad = normalisedTotalLoadOfAllRegions % numServers == 0 ? minLoad : minLoad + 1; // as we now have to balance stuff, init PQ's LOG.debug(String.format("#################minLoad =%s,maxLoad= %s", minLoad, maxLoad)); for (Map.Entry<HotSpotServerAndLoad, List<HotSpotRegionLoad>> item : regionServerAndServerLoadMap .entrySet()) { HotSpotServerAndLoad serverLoad = item.getKey(); if (serverLoad.isHotSpot()) { hotspotRegionServers.add(serverLoad); } else { if (serverLoad.getLoad() < maxLoad) { nonHotspotRegionServers.add(serverLoad); } } } // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(normalisedTotalLoadOfAllRegions) .append(", numServers=").append(numServers).append(", max=").append(maxLoad).append(", min=") .append(minLoad); LOG.debug(strBalanceParam.toString()); List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>(); while (hotspotRegionServers.size() > 0 && nonHotspotRegionServers.size() > 0) { HotSpotServerAndLoad serverToBalance = hotspotRegionServers.poll(); LOG.debug(String.format("#################serverToBalance =%s", serverToBalance.getServerName().getServerName())); // get least loaded not hotspot regions of this server List<HotSpotRegionLoad> regionList = regionServerAndServerLoadMap.get(serverToBalance); // assume it to be sorted asc. if (regionList.size() > 0) { HotSpotRegionLoad regionToMove = regionList.remove(0); HRegionInfo regionMoveInfo = allRegionsLoadBiMap.inverse().get(regionToMove); /* * regionMoveInfo can be null in case the load map returns us * the root and meta regions along with the movable regions But * as the clusterState which is passed to us does not contain * these regions we can have a situation where * regionServerAndServerLoadMap contains some regions which are * not present in the allRegionsLoadBiMap */ if (regionMoveInfo != null && !regionMoveInfo.isMetaRegion() && !regionMoveInfo.isRootRegion() && !regionMoveInfo.isMetaTable() && regionToMove.isRegionHotspot()) { LOG.debug(String.format( "#################Came to move the region regionMoveInfo=%s;; regionToMove=%s ", regionMoveInfo, regionToMove)); // move out. HotSpotServerAndLoad destinationServer = nonHotspotRegionServers.poll(); RegionPlan rpl = new RegionPlan(allRegionsLoadBiMap.inverse().get(regionToMove), serverToBalance.getServerName(), destinationServer.getServerName()); regionsToReturn.add(rpl); serverToBalance.modifyLoad(regionToMove.getLoad()); destinationServer.modifyLoad(-1 * regionToMove.getLoad()); // reenter them to list. if they satisfy conditions if (serverToBalance.getLoad() > minLoad) { hotspotRegionServers.offer(serverToBalance); } if (destinationServer.getLoad() < maxLoad) { nonHotspotRegionServers.offer(destinationServer); } } } } LOG.info("Total Time taken to balance = " + (System.currentTimeMillis() - startTime)); LOG.info(String.format("#################regionsToReturn=%s ", regionsToReturn)); return regionsToReturn; }