Example usage for java.util PriorityQueue size

List of usage examples for java.util PriorityQueue size

Introduction

In this page you can find the example usage for java.util PriorityQueue size.

Prototype

int size

To view the source code for java.util PriorityQueue size.

Click Source Link

Document

The number of elements in the priority queue.

Usage

From source file:edu.snu.leader.hidden.SpatialIndividual.java

/**
 * Finds the nearest neighbors for this individual
 *
 * @param simState/*from   w w w.  ja va  2  s.c o  m*/
 */
public void findNearestNeighbors(SimulationState simState) {
    _LOG.trace("Entering findNearestNeighbors( simState )");

    // Get the number of nearest neighbors
    _nearestNeighborCount = simState.getNearestNeighborCount();

    // Build a priority queue to sort things for us
    PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>();

    // Iterate through all the individuals
    Iterator<SpatialIndividual> indIter = simState.getAllIndividuals().iterator();
    while (indIter.hasNext()) {
        // Get the individual
        SpatialIndividual ind = indIter.next();

        // If it is us, continue on
        if (_id.equals(ind._id)) {
            continue;
        }

        // Build a neighbor out of it and put it in the queue
        Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind);
        sortedNeighbors.add(neighbor);
    }

    // Get the "nearest" neighbors
    int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount);
    for (int i = 0; i < count; i++) {
        Neighbor neighbor = sortedNeighbors.poll();
        _nearestNeighbors.add(neighbor);
        neighbor.getIndividual().signalNearestNeighborStatus(this);
        //            _LOG.debug( "Nearest neighbor: id=["
        //                    + getID()
        //                    + "] neighbor=["
        //                    + neighbor.getIndividual().getID()
        //                    + "]" );
    }

    _LOG.trace("Leaving findNearestNeighbors( simState )");
}

From source file:org.mskcc.cbio.portal.servlet.NetworkServlet.java

/**
 *
 * @param network// www.  j  a va 2s. com
 * @param n
 * @return
 */
private List<Node> getNodesToRemove(final Network network, final double diffusion, final int n) {
    final Map<Node, Double> mapDiffusion = getMapDiffusedTotalAlteredPercentage(network, diffusion);

    // keep track of the top nKeep
    PriorityQueue<Node> topAlteredNodes = new PriorityQueue<Node>(n, new Comparator<Node>() {
        public int compare(Node n1, Node n2) {
            int ret = mapDiffusion.get(n1).compareTo(mapDiffusion.get(n2));
            if (diffusion != 0 && ret == 0) { // if the same diffused perc, use own perc
                ret = Double.compare(getTotalAlteredPercentage(n1), getTotalAlteredPercentage(n2));
            }

            if (ret == 0) { // if the same, rank according to degree
                ret = network.getDegree(n1) - network.getDegree(n2);
            }

            return ret;
        }
    });

    List<Node> nodesToRemove = new ArrayList<Node>();
    for (Node node : network.getNodes()) {
        if (isInQuery(node) || node.getType().equals(NodeType.DRUG)) {
            continue;
        }

        if (topAlteredNodes.size() < n) {
            topAlteredNodes.add(node);
        } else {
            if (n == 0) {
                nodesToRemove.add(node);
            } else {
                if (mapDiffusion.get(node) > mapDiffusion.get(topAlteredNodes.peek())) {
                    nodesToRemove.add(topAlteredNodes.poll());
                    topAlteredNodes.add(node);
                } else {
                    nodesToRemove.add(node);
                }
            }
        }
    }

    return nodesToRemove;
}

From source file:org.kuali.rice.krms.framework.engine.TermResolutionEngineImpl.java

/**
 *
 * @param termName/* w  w w .j a v a2 s  .c o  m*/
 * @return List<{@link TermResolverKey}>
 */
protected List<TermResolverKey> buildTermResolutionPlan(String termName) {
    // our result
    List<TermResolverKey> resolutionPlan = null;

    // Holds the resolvers we've visited, along with the needed metadata for generating our final plan
    Map<TermResolverKey, Visited> visitedByKey = new HashMap<TermResolverKey, Visited>();

    // this holds a least cost first list of nodes remaining to be explored
    PriorityQueue<ToVisit> toVisits = new PriorityQueue<ToVisit>(); // nice grammar there cowboy

    // dummy resolver to be the root of this tree
    // Do I really need this?  Yes, because there may be more than one resolver that resolves to the desired termName,
    // so this destination unifies the trees of those candidate resolvers
    TermResolver destination = createDestination(termName); // problem is we can't get this one out of the registry
    TermResolverKey destinationKey = new TermResolverKey(destination);

    LOG.debug("Beginning resolution tree search for " + termName);

    // seed our queue of resolvers to visit
    // need to be aware of null parent for root ToVisit
    toVisits.add(new ToVisit(0, destination, null));

    // there may not be a viable plan
    boolean plannedToDestination = false;

    // We'll do a modified Dijkstra's shortest path algorithm, where at each leaf we see if we've planned out
    // termName resolution all the way up to the root, our destination.  If so, we just reconstruct our plan.
    while (!plannedToDestination && toVisits.size() > 0) {
        // visit least cost node remaining
        ToVisit visiting = toVisits.poll();

        LOG.debug("visiting " + visiting.getTermResolverKey());

        // the resolver is the edge in our tree -- we don't get it directly from the termResolversByKey Map, because it could be our destination
        TermResolver resolver = getResolver(visiting.getTermResolverKey(), destination, destinationKey);
        TermResolver parent = getResolver(visiting.getParentKey(), destination, destinationKey);

        if (visitedByKey.containsKey(visiting.getTermResolverKey())) {
            continue; // We've already visited this one
        }

        Visited parentVisited = visitedByKey.get(visiting.getParentKey());

        if (resolver == null)
            throw new RuntimeException("Unable to get TermResolver by its key");
        Set<String> prereqs = resolver.getPrerequisites();
        // keep track of any prereqs that we already have handy
        List<String> metPrereqs = new LinkedList<String>();

        // see what prereqs we have already, and which we'll need to visit
        if (prereqs != null)
            for (String prereq : prereqs) {
                if (!termCache.containsKey(new Term(prereq, null))) {
                    // enqueue all resolvers in toVisits
                    List<TermResolver<?>> prereqResolvers = termResolversByOutput.get(prereq);
                    if (prereqResolvers != null)
                        for (TermResolver prereqResolver : prereqResolvers) {
                            // Only TermResolvers that don't take paramaterized terms can be chained, so:
                            // if the TermResolver doesn't take parameters, or it resolves the output termName
                            if (CollectionUtils.isEmpty(prereqResolver.getParameterNames())
                                    || termName.equals(prereqResolver.getOutput())) {
                                // queue it up for visiting
                                toVisits.add(new ToVisit(visiting.getCost() /* cost to get to this resolver */,
                                        prereqResolver, resolver));
                            }
                        }
                } else {
                    metPrereqs.add(prereq);
                }
            }

        // Build visited info
        Visited visited = buildVisited(resolver, parentVisited, metPrereqs);
        visitedByKey.put(visited.getResolverKey(), visited);

        plannedToDestination = isPlannedBackToDestination(visited, destinationKey, visitedByKey);
    }

    if (plannedToDestination) {
        // build result from Visited tree.
        resolutionPlan = new LinkedList<TermResolverKey>();

        assembleLinearResolutionPlan(visitedByKey.get(destinationKey), visitedByKey, resolutionPlan);
    }
    return resolutionPlan;
}

From source file:$.HyperGraphBuilder$.java

@Override
    public V_GenericGraph makeGraphResponse(final V_GraphQuery graphQuery) throws Exception {
        nodeList = new HashMap<String, V_GenericNode>();
        // edgeMap = new HashMap<String, V_GenericEdge>();
        edgeList = new HashMap<String, V_GenericEdge>();
        scannedQueries = new HashSet<String>();

        final PriorityQueue<G_EntityQuery> queriesToRun = new PriorityQueue<G_EntityQuery>(10,
                new ScoreComparator());
        Map<String, V_GenericNode> nodesFromPreviousDegree = new HashMap<String, V_GenericNode>();
        Map<String, V_GenericEdge> edgesFromPreviousDegree = new HashMap<String, V_GenericEdge>();

        if (graphQuery.getMaxHops() <= 0) {
            return new V_GenericGraph();
        } else {/*from w  ww.  j a  v a 2  s  . c o  m*/
            logger.debug("Attempting a graph for query " + graphQuery.toString());
        }

        int intStatus = 0;
        String strStatus = "Graph Loaded";

        final G_PropertyMatchDescriptor identifierList = G_PropertyMatchDescriptor.newBuilder().setKey("_all")
                .setListRange(new ListRangeHelper(G_PropertyType.STRING, graphQuery.getSearchIds()))
                .setConstraint(G_Constraint.EQUALS).build();
        final QueryHelper qh = new QueryHelper(identifierList);
        qh.setTargetSchema(index);
        queriesToRun.add(qh);

        int currentDegree = 0;
        for (currentDegree = 0; (currentDegree < graphQuery.getMaxHops())
                && (nodeList.size() < graphQuery.getMaxNodes()); currentDegree++) {
            G_EntityQuery eq = null;
            logger.debug("${symbol_dollar}${symbol_dollar}${symbol_dollar}${symbol_dollar}There are "
                    + queriesToRun.size() + " queries to run in the current degree.");
            while ((queriesToRun.size() > 0) && ((eq = queriesToRun.poll()) != null)
                    && (nodeList.size() < graphQuery.getMaxNodes())) {

                if (ValidationUtils.isValid(eq.getPropertyMatchDescriptors())) {
                    nodesFromPreviousDegree = new HashMap<String, V_GenericNode>(nodeList);
                    edgesFromPreviousDegree = new HashMap<String, V_GenericEdge>(edgeList);
                    logger.debug("Processing degree " + currentDegree);

                    /**
                     * This will end up building nodes and edges, and creating
                     * new queries for the queue
                     */
                    logger.debug("1111=====Running query " + eq.toString());
                    getDAO().performCallback(0, eq.getMaxResult(), this, eq);
                    logger.debug("3333====After running " + eq.toString() + ", there are "
                            + queriesToRunNextDegree.size() + " queries to run in the next degree.");
                }
            } // end while loop

            // very important!!
            // unscannedNodeList.clear();
            // ////////////////////////////////////////////////
            logger.debug("4444==== At the end of degree " + currentDegree + ", there are " + nodeList.size()
                    + " nodes and " + edgeList.size() + " edges");

            logger.debug(
                    "5555====There are " + queriesToRunNextDegree.size() + " queries to run in the next degree.");
            queriesToRun.addAll(queriesToRunNextDegree);
            queriesToRunNextDegree.clear();
        }

        // All hops have been done
        // Check to see if we have too many nodes.
        if (nodeList.size() > graphQuery.getMaxNodes()) {
            nodeList = nodesFromPreviousDegree;
            edgeList = edgesFromPreviousDegree;
            intStatus = 1; // will trigger the message.
            strStatus = "Returning only " + currentDegree
                    + " hops, as maximum nodes you requested would be exceeded";
        } else {
            intStatus = 1; // will trigger the message.
            strStatus = "Returning " + nodeList.size() + " nodes and " + edgeList.size() + " edges.";
        }

        // NOW finally add in all those unique edges.

        performPostProcess(graphQuery);
        final V_GenericGraph g = new V_GenericGraph(nodeList, edgeList);
        g.setIntStatus(intStatus);
        g.setStrStatus(strStatus);
        logger.debug("Graph status: " + g.getStrStatus());
        for (final V_LegendItem li : legendItems) {
            g.addLegendItem(li);
        }

        return g;
    }

From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java

@Override
public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName,
        ExternalView externalView, List<InstanceConfig> instanceConfigList) {
    // The default routing table algorithm tries to balance all available segments across all servers, so that each
    // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger
    // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed,
    // returned, deserialized, aggregated, etc.).
    ///*from   ww w  .j  ava 2  s. c  o  m*/
    // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail
    // latency. This is due to the fact that a query cannot return until it has received a response from each server,
    // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a
    // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments
    // within any given routing table so that each server in the routing table has approximately the same number of
    // segments to process.
    //
    // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers.
    // With this set of servers, we check if the set of segments served by these servers is complete. If the set of
    // segments served does not cover all of the segments, we compute the list of missing segments and pick a random
    // server that serves these missing segments until we have complete coverage of all the segments.
    //
    // We then order the segments in ascending number of replicas within our server set, in order to allocate the
    // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to
    // end up on a replica with fewer segments.
    //
    // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely
    // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table
    // that's as even as possible.
    //
    // The algorithm to generate a routing table is thus:
    // 1. Compute the inverse external view, a mapping of servers to segments
    // 2. For each routing table to generate:
    //   a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers
    //   b) Check if the server set covers all the segments; if not, add additional servers until it does.
    //   c) Order the segments in our server set in ascending order of number of replicas present in our server set
    //   d) For each segment, pick a random replica with proper weighting
    //   e) Return that routing table
    //
    // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize
    // according to two criteria: the variance in workload per server for any individual table as well as the variance
    // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables
    // according to a per-routing table metric and discard the worst routing tables.

    RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator();
    routingTableGenerator.init(externalView, instanceConfigList);

    PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>(
            ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() {
                @Override
                public int compare(Pair<Map<String, Set<String>>, Float> left,
                        Pair<Map<String, Set<String>>, Float> right) {
                    // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison
                    return -Float.compare(left.getValue(), right.getValue());
                }
            });

    for (int i = 0; i < ROUTING_TABLE_COUNT; i++) {
        topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator));
    }

    // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones
    for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) {
        Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric(
                routingTableGenerator);
        Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek();

        // If the new routing table is better than the worst one, keep it
        if (newRoutingTable.getRight() < worstRoutingTable.getRight()) {
            topRoutingTables.poll();
            topRoutingTables.add(newRoutingTable);
        }
    }

    // Return the best routing tables
    List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size());
    while (!topRoutingTables.isEmpty()) {
        Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll();
        routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey()));
    }

    return routingTables;
}

From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java

@Override
public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView,
        List<InstanceConfig> instanceConfigs) {
    // The default routing table algorithm tries to balance all available segments across all servers, so that each
    // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger
    // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed,
    // returned, deserialized, aggregated, etc.).
    ////ww w.j  a  v a 2  s .  c o  m
    // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail
    // latency. This is due to the fact that a query cannot return until it has received a response from each server,
    // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a
    // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments
    // within any given routing table so that each server in the routing table has approximately the same number of
    // segments to process.
    //
    // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers.
    // With this set of servers, we check if the set of segments served by these servers is complete. If the set of
    // segments served does not cover all of the segments, we compute the list of missing segments and pick a random
    // server that serves these missing segments until we have complete coverage of all the segments.
    //
    // We then order the segments in ascending number of replicas within our server set, in order to allocate the
    // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to
    // end up on a server with fewer segments.
    //
    // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a
    // routing table that's as even as possible.
    //
    // The algorithm to generate a routing table is thus:
    // 1. Compute the inverse external view, a mapping of servers to segments
    // 2. For each routing table to generate:
    //   a) Pick _targetNumServersPerQuery distinct servers
    //   b) Check if the server set covers all the segments; if not, add additional servers until it does
    //   c) Order the segments in our server set in ascending order of number of replicas present in our server set
    //   d) For each segment, pick a server with least segments already assigned
    //   e) Return that routing table
    //
    // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize
    // according to two criteria: the variance in workload per server for any individual table as well as the variance
    // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables
    // according to a per-routing table metric and discard the worst routing tables.

    RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator();
    routingTableGenerator.init(externalView, instanceConfigs);

    PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>(
            ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() {
                @Override
                public int compare(Pair<Map<String, List<String>>, Float> left,
                        Pair<Map<String, List<String>>, Float> right) {
                    // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison
                    return -Float.compare(left.getValue(), right.getValue());
                }
            });

    for (int i = 0; i < ROUTING_TABLE_COUNT; i++) {
        topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator));
    }

    // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones
    for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) {
        Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric(
                routingTableGenerator);
        Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek();

        // If the new routing table is better than the worst one, keep it
        if (newRoutingTable.getRight() < worstRoutingTable.getRight()) {
            topRoutingTables.poll();
            topRoutingTables.add(newRoutingTable);
        }
    }

    // Return the best routing tables
    List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size());
    while (!topRoutingTables.isEmpty()) {
        routingTables.add(topRoutingTables.poll().getKey());
    }

    setRoutingTables(routingTables);
}

From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java

@Override
public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) {
    MONITOR.startTask("parseSentence");
    try {/*from w w  w . ja v a 2s .  co  m*/
        long startTime = (new Date()).getTime();
        int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000;
        int minFreeMemoryBytes = minFreeMemory * KILOBYTE;

        TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence();

        TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>();

        PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>();
        for (PosTagSequence posTagSequence : posTagSequences) {
            // add an initial ParseConfiguration for each postag sequence
            ParseConfiguration initialConfiguration = this.getParserServiceInternal()
                    .getInitialConfiguration(posTagSequence);
            initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy());
            heap0.add(initialConfiguration);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding initial posTagSequence: " + posTagSequence);
            }
        }
        heaps.put(0, heap0);
        PriorityQueue<ParseConfiguration> backupHeap = null;

        PriorityQueue<ParseConfiguration> finalHeap = null;
        PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>();
        while (heaps.size() > 0) {
            Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry();
            PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue();
            int currentHeapIndex = heapEntry.getKey();
            if (LOG.isTraceEnabled()) {
                LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: "
                        + heapEntry.getValue().size());
            }

            boolean finished = false;
            // systematically set the final heap here, just in case we exit "naturally" with no more heaps
            finalHeap = heapEntry.getValue();
            backupHeap = new PriorityQueue<ParseConfiguration>();

            // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time
            ParseConfiguration topConf = currentHeap.peek();
            if (topConf.isTerminal()) {
                LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: "
                        + heapEntry.getValue().size());
                finished = true;
            }

            if (earlyStop && terminalHeap.size() >= beamWidth) {
                LOG.debug(
                        "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting.");
                finalHeap = terminalHeap;
                finished = true;
            }

            long analysisTime = (new Date()).getTime() - startTime;
            if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) {
                LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText());
                LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds.");
                finished = true;
            }

            if (minFreeMemory > 0) {
                long freeMemory = Runtime.getRuntime().freeMemory();
                if (freeMemory < minFreeMemoryBytes) {
                    LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText());
                    LOG.info("Min free memory (bytes):" + minFreeMemoryBytes);
                    LOG.info("Current free memory (bytes): " + freeMemory);
                    finished = true;
                }
            }

            if (finished) {
                break;
            }

            // limit the breadth to K
            int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size();

            int j = 0;
            while (currentHeap.size() > 0) {
                ParseConfiguration history = currentHeap.poll();
                if (LOG.isTraceEnabled()) {
                    LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":");
                    LOG.trace(history.toString());
                    LOG.trace("Score: " + df.format(history.getScore()));
                    LOG.trace(history.getPosTagSequence());
                }

                List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>();

                // test the positive rules on the current configuration
                boolean ruleApplied = false;
                if (parserPositiveRules != null) {
                    MONITOR.startTask("check rules");
                    try {
                        for (ParserRule rule : parserPositiveRules) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("Checking rule: " + rule.toString());
                            }
                            RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                            FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env);
                            if (ruleResult != null && ruleResult.getOutcome()) {
                                Decision<Transition> positiveRuleDecision = TalismaneSession
                                        .getTransitionSystem().createDefaultDecision(rule.getTransition());
                                decisions.add(positiveRuleDecision);
                                positiveRuleDecision.addAuthority(rule.getCondition().getName());
                                ruleApplied = true;
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Rule applies. Setting transition to: "
                                            + rule.getTransition().getCode());
                                }
                                break;
                            }
                        }
                    } finally {
                        MONITOR.endTask("check rules");
                    }
                }

                if (!ruleApplied) {
                    // test the features on the current configuration
                    List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
                    MONITOR.startTask("feature analyse");
                    try {
                        for (ParseConfigurationFeature<?> feature : this.parseFeatures) {
                            MONITOR.startTask(feature.getName());
                            try {
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<?> featureResult = feature.check(history, env);
                                if (featureResult != null)
                                    parseFeatureResults.add(featureResult);
                            } finally {
                                MONITOR.endTask(feature.getName());
                            }
                        }
                        if (LOG_FEATURES.isTraceEnabled()) {
                            for (FeatureResult<?> featureResult : parseFeatureResults) {
                                LOG_FEATURES.trace(featureResult.toString());
                            }
                        }
                    } finally {
                        MONITOR.endTask("feature analyse");
                    }

                    // evaluate the feature results using the decision maker
                    MONITOR.startTask("make decision");
                    try {
                        decisions = this.decisionMaker.decide(parseFeatureResults);

                        for (ClassificationObserver<Transition> observer : this.observers) {
                            observer.onAnalyse(history, parseFeatureResults, decisions);
                        }

                        List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(
                                decisions.size());
                        for (Decision<Transition> decision : decisions) {
                            if (decision.getProbability() > MIN_PROB_TO_STORE)
                                decisionShortList.add(decision);
                        }
                        decisions = decisionShortList;
                    } finally {
                        MONITOR.endTask("make decision");
                    }

                    // apply the negative rules
                    Set<Transition> eliminatedTransitions = new HashSet<Transition>();
                    if (parserNegativeRules != null) {
                        MONITOR.startTask("check negative rules");
                        try {
                            for (ParserRule rule : parserNegativeRules) {
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Checking negative rule: " + rule.toString());
                                }
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env);
                                if (ruleResult != null && ruleResult.getOutcome()) {
                                    eliminatedTransitions.addAll(rule.getTransitions());
                                    if (LOG.isTraceEnabled()) {
                                        for (Transition eliminatedTransition : rule.getTransitions())
                                            LOG.trace("Rule applies. Eliminating transition: "
                                                    + eliminatedTransition.getCode());
                                    }
                                }
                            }

                            if (eliminatedTransitions.size() > 0) {
                                List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>();
                                for (Decision<Transition> decision : decisions) {
                                    if (!eliminatedTransitions.contains(decision.getOutcome())) {
                                        decisionShortList.add(decision);
                                    } else {
                                        LOG.trace("Eliminating decision: " + decision.toString());
                                    }
                                }
                                if (decisionShortList.size() > 0) {
                                    decisions = decisionShortList;
                                } else {
                                    LOG.debug("All decisions eliminated! Restoring original decisions.");
                                }
                            }
                        } finally {
                            MONITOR.endTask("check negative rules");
                        }
                    }
                } // has a positive rule been applied?

                boolean transitionApplied = false;
                // add new configuration to the heap, one for each valid transition
                MONITOR.startTask("heap sort");
                try {
                    // Why apply all decisions here? Why not just the top N (where N = beamwidth)?
                    // Answer: because we're not always adding solutions to the same heap
                    // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1)
                    // So, if we've already applied N decisions of each type, we should be able to stop
                    for (Decision<Transition> decision : decisions) {
                        Transition transition = decision.getOutcome();
                        if (LOG.isTraceEnabled())
                            LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability());

                        if (transition.checkPreconditions(history)) {
                            transitionApplied = true;
                            ParseConfiguration configuration = this.parserServiceInternal
                                    .getConfiguration(history);
                            if (decision.isStatistical())
                                configuration.addDecision(decision);
                            transition.apply(configuration);

                            int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration)
                                    * 1000;
                            if (configuration.isTerminal()) {
                                nextHeapIndex = Integer.MAX_VALUE;
                            } else {
                                while (nextHeapIndex <= currentHeapIndex)
                                    nextHeapIndex++;
                            }

                            PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex);
                            if (nextHeap == null) {
                                if (configuration.isTerminal())
                                    nextHeap = terminalHeap;
                                else
                                    nextHeap = new PriorityQueue<ParseConfiguration>();
                                heaps.put(nextHeapIndex, nextHeap);
                                if (LOG.isTraceEnabled())
                                    LOG.trace("Created heap with index: " + nextHeapIndex);
                            }
                            nextHeap.add(configuration);
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("Added configuration with score " + configuration.getScore()
                                        + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size());
                            }

                            configuration.clearMemory();
                        } else {
                            if (LOG.isTraceEnabled())
                                LOG.trace("Cannot apply transition: doesn't meet pre-conditions");
                            // just in case the we run out of both heaps and analyses, we build this backup heap
                            backupHeap.add(history);
                        } // does transition meet pre-conditions?
                    } // next transition
                } finally {
                    MONITOR.endTask("heap sort");
                }

                if (transitionApplied) {
                    j++;
                } else {
                    LOG.trace("No transitions could be applied: not counting this history as part of the beam");
                }

                // beam width test
                if (j == maxSequences)
                    break;
            } // next history   
        } // next atomic index

        // return the best sequences on the heap
        List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>();
        int i = 0;

        if (finalHeap.isEmpty())
            finalHeap = backupHeap;

        while (!finalHeap.isEmpty()) {
            bestConfigurations.add(finalHeap.poll());
            i++;
            if (i >= this.getBeamWidth())
                break;
        }
        if (LOG.isDebugEnabled()) {
            for (ParseConfiguration finalConfiguration : bestConfigurations) {
                LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString());
                LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence());
                LOG.debug("Transitions: " + finalConfiguration.getTransitions());
                LOG.debug("Decisions: " + finalConfiguration.getDecisions());
                if (LOG.isTraceEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    for (Decision<Transition> decision : finalConfiguration.getDecisions()) {
                        sb.append(" * ");
                        sb.append(df.format(decision.getProbability()));
                    }
                    sb.append(" root ");
                    sb.append(finalConfiguration.getTransitions().size());
                    LOG.trace(sb.toString());

                    sb = new StringBuilder();
                    sb.append(" * PosTag sequence score ");
                    sb.append(df.format(finalConfiguration.getPosTagSequence().getScore()));
                    sb.append(" = ");
                    for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) {
                        sb.append(" * ");
                        sb.append(df.format(posTaggedToken.getDecision().getProbability()));
                    }
                    sb.append(" root ");
                    sb.append(finalConfiguration.getPosTagSequence().size());
                    LOG.trace(sb.toString());

                    sb = new StringBuilder();
                    sb.append(" * Token sequence score = ");
                    sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore()));
                    LOG.trace(sb.toString());

                }
            }
        }
        return bestConfigurations;
    } finally {
        MONITOR.endTask("parseSentence");
    }
}

From source file:com.joliciel.talismane.tokeniser.patterns.CompoundPatternTokeniser.java

@Override
public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) {
    MONITOR.startTask("tokeniseWithDecisions");
    try {//from  w w  w .  java  2 s . c  o  m
        // apply any pre-tokenisation decisions via filters
        // we only want one placeholder per start index - the first one that gets added
        Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>();
        for (TokenFilter tokenFilter : this.tokenFilters) {
            Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText());
            for (TokenPlaceholder placeholder : myPlaceholders) {
                if (!placeholderMap.containsKey(placeholder.getStartIndex())) {
                    placeholderMap.put(placeholder.getStartIndex(), placeholder);
                }
            }
            if (LOG.isTraceEnabled()) {
                if (myPlaceholders.size() > 0) {
                    LOG.trace("TokenFilter: " + tokenFilter);
                    LOG.trace("placeholders: " + myPlaceholders);
                }
            }
        }

        Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values());

        // Initially, separate the sentence into tokens using the separators provided
        TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS,
                placeholders);

        // apply any pre-processing filters that have been added
        for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) {
            tokenSequenceFilter.apply(tokenSequence);
        }

        // Assign each separator its default value
        List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence);
        List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>(
                defaultOutcomes.size());
        for (TokeniserOutcome outcome : defaultOutcomes) {
            Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory
                    .createDefaultDecision(outcome);
            tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName());
            tokeniserDecision.addAuthority("_" + "DefaultDecision");
            defaultDecisions.add(tokeniserDecision);
        }

        List<TokenisedAtomicTokenSequence> sequences = null;

        // For each test pattern, see if anything in the sentence matches it
        if (this.decisionMaker != null) {
            List<TokenPatternMatchSequence> matchingSequences = new ArrayList<TokenPatternMatchSequence>();
            Map<Token, Set<TokenPatternMatchSequence>> tokenMatchSequenceMap = new HashMap<Token, Set<TokenPatternMatchSequence>>();
            Map<TokenPatternMatchSequence, TokenPatternMatch> primaryMatchMap = new HashMap<TokenPatternMatchSequence, TokenPatternMatch>();
            Set<Token> matchedTokens = new HashSet<Token>();

            MONITOR.startTask("pattern matching");
            try {
                for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) {
                    List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence);
                    for (TokenPatternMatchSequence matchSequence : matchesForThisPattern) {
                        matchingSequences.add(matchSequence);
                        matchedTokens.addAll(matchSequence.getTokensToCheck());

                        TokenPatternMatch primaryMatch = null;
                        Token token = matchSequence.getTokensToCheck().get(0);

                        Set<TokenPatternMatchSequence> matchSequences = tokenMatchSequenceMap.get(token);
                        if (matchSequences == null) {
                            matchSequences = new TreeSet<TokenPatternMatchSequence>();
                            tokenMatchSequenceMap.put(token, matchSequences);
                        }
                        matchSequences.add(matchSequence);

                        for (TokenPatternMatch patternMatch : matchSequence.getTokenPatternMatches()) {
                            if (patternMatch.getToken().equals(token)) {
                                primaryMatch = patternMatch;
                                break;
                            }
                        }

                        if (LOG.isTraceEnabled()) {
                            LOG.trace("Found match: " + primaryMatch);
                        }
                        primaryMatchMap.put(matchSequence, primaryMatch);
                    }
                }
            } finally {
                MONITOR.endTask("pattern matching");
            }

            // we want to create the n most likely token sequences
            // the sequence has to correspond to a token pattern
            Map<TokenPatternMatchSequence, List<Decision<TokeniserOutcome>>> matchSequenceDecisionMap = new HashMap<TokenPatternMatchSequence, List<Decision<TokeniserOutcome>>>();

            for (TokenPatternMatchSequence matchSequence : matchingSequences) {
                TokenPatternMatch match = primaryMatchMap.get(matchSequence);
                LOG.debug("next pattern match: " + match.toString());
                List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>();
                MONITOR.startTask("analyse features");
                try {
                    for (TokenPatternMatchFeature<?> feature : features) {
                        RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                        FeatureResult<?> featureResult = feature.check(match, env);
                        if (featureResult != null) {
                            tokenFeatureResults.add(featureResult);
                        }
                    }

                    if (LOG.isTraceEnabled()) {
                        for (FeatureResult<?> featureResult : tokenFeatureResults) {
                            LOG.trace(featureResult.toString());
                        }
                    }
                } finally {
                    MONITOR.endTask("analyse features");
                }

                List<Decision<TokeniserOutcome>> decisions = null;
                MONITOR.startTask("make decision");
                try {
                    decisions = this.decisionMaker.decide(tokenFeatureResults);

                    for (ClassificationObserver<TokeniserOutcome> observer : this.observers)
                        observer.onAnalyse(match.getToken(), tokenFeatureResults, decisions);

                    for (Decision<TokeniserOutcome> decision : decisions) {
                        decision.addAuthority("_" + this.getClass().getSimpleName());
                        decision.addAuthority("_" + "Patterns");
                        decision.addAuthority(match.getPattern().getName());
                    }
                } finally {
                    MONITOR.endTask("make decision");
                }

                matchSequenceDecisionMap.put(matchSequence, decisions);
            }

            // initially create a heap with a single, empty sequence
            PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>();
            TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService()
                    .getTokenisedAtomicTokenSequence(sentence, 0);
            heap.add(emptySequence);

            for (int i = 0; i < tokenSequence.listWithWhiteSpace().size(); i++) {
                Token token = tokenSequence.listWithWhiteSpace().get(i);
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Token : \"" + token.getText() + "\"");
                }

                // build a new heap for this iteration
                PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap;
                heap = new PriorityQueue<TokenisedAtomicTokenSequence>();

                if (i == 0) {
                    // first token is always "separate" from the outside world
                    Decision<TokeniserOutcome> decision = this.tokeniserDecisionFactory
                            .createDefaultDecision(TokeniserOutcome.SEPARATE);
                    decision.addAuthority("_" + this.getClass().getSimpleName());
                    decision.addAuthority("_" + "DefaultDecision");

                    TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token,
                            decision);

                    TokenisedAtomicTokenSequence newSequence = this.getTokeniserService()
                            .getTokenisedAtomicTokenSequence(emptySequence);
                    newSequence.add(taggedToken);
                    heap.add(newSequence);
                    continue;
                }

                // limit the heap breadth to K
                int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth()
                        : previousHeap.size();
                MONITOR.startTask("heap sort");
                try {
                    for (int j = 0; j < maxSequences; j++) {
                        TokenisedAtomicTokenSequence history = previousHeap.poll();

                        // Find the separating & non-separating decisions
                        if (history.size() > i) {
                            // token already added as part of a sequence introduced by another token
                            heap.add(history);
                        } else if (tokenMatchSequenceMap.containsKey(token)) {
                            // token begins one or more match sequences
                            // these are ordered from shortest to longest (via TreeSet)
                            List<TokenPatternMatchSequence> matchSequences = new ArrayList<TokenPatternMatchSequence>(
                                    tokenMatchSequenceMap.get(token));

                            // Since sequences P1..Pn contain each other,
                            // there can be exactly matchSequences.size() consistent solutions
                            // Assume the default is separate
                            // 0: all separate
                            // 1: join P1, separate rest
                            // 2: join P2, separate rest
                            // ...
                            // n: join Pn
                            // We need to add each of these to the heap
                            // by taking the product of all probabilities consistent with each solution
                            // The probabities for each solution are (j=join, s=separate)
                            // All separate: s1 x s2 x ... x sn
                            // P1: j1 x s2 x ... x sn
                            // P2: j1 x j2 x ... x sn
                            // ...
                            // Pn: j1 x j2 x ... x jn
                            // Any solution of the form s1 x j2 would be inconsistent, and is not considered
                            // If Pi and Pj start and end on the exact same token, then the solution for both is
                            // Pi: j1 x ... x ji x jj x sj+1 ... x sn
                            // Pj: j1 x ... x ji x jj x sj+1 ... x sn
                            // Note of course that we're never likely to have more than two Ps here,
                            // but we need a solution for more just to be sure to be sure
                            TokeniserOutcome defaultOutcome = defaultDecisions
                                    .get(token.getIndexWithWhiteSpace()).getOutcome();
                            TokeniserOutcome otherOutcome = null;
                            if (defaultOutcome == TokeniserOutcome.SEPARATE)
                                otherOutcome = TokeniserOutcome.JOIN;
                            else
                                otherOutcome = TokeniserOutcome.SEPARATE;

                            double[] decisionProbs = new double[matchSequences.size() + 1];
                            for (int k = 0; k < decisionProbs.length; k++)
                                decisionProbs[k] = 1;

                            // Note: k0 = default decision (e.g. separate all), k1=first pattern
                            // p1 = first pattern
                            int p = 1;
                            int prevEndIndex = -1;
                            for (TokenPatternMatchSequence matchSequence : matchSequences) {
                                int endIndex = matchSequence.getTokensToCheck()
                                        .get(matchSequence.getTokensToCheck().size() - 1).getEndIndex();
                                List<Decision<TokeniserOutcome>> decisions = matchSequenceDecisionMap
                                        .get(matchSequence);
                                for (Decision<TokeniserOutcome> decision : decisions) {
                                    for (int k = 0; k < decisionProbs.length; k++) {
                                        if (decision.getOutcome() == defaultOutcome) {
                                            // e.g. separate in most cases
                                            if (k < p && endIndex > prevEndIndex)
                                                decisionProbs[k] *= decision.getProbability();
                                            else if (k + 1 < p && endIndex <= prevEndIndex)
                                                decisionProbs[k] *= decision.getProbability();
                                        } else {
                                            // e.g. join in most cases
                                            if (k >= p && endIndex > prevEndIndex)
                                                decisionProbs[k] *= decision.getProbability();
                                            else if (k + 1 >= p && endIndex <= prevEndIndex)
                                                decisionProbs[k] *= decision.getProbability();
                                        }
                                    } // next k
                                } // next decision (only 2 of these)
                                prevEndIndex = endIndex;
                                p++;
                            }

                            // transform to probability distribution
                            double sumProbs = 0;
                            for (int k = 0; k < decisionProbs.length; k++)
                                sumProbs += decisionProbs[k];

                            if (sumProbs > 0)
                                for (int k = 0; k < decisionProbs.length; k++)
                                    decisionProbs[k] /= sumProbs;

                            // Apply default decision
                            // Since this is the default decision for all tokens in the sequence, we don't add the other tokens for now,
                            // so as to allow them
                            // to get examined one at a time, just in case one of them starts its own separate sequence
                            Decision<TokeniserOutcome> defaultDecision = this.tokeniserDecisionFactory
                                    .createDecision(defaultOutcome.getCode(), decisionProbs[0]);
                            defaultDecision.addAuthority("_" + this.getClass().getSimpleName());
                            defaultDecision.addAuthority("_" + "Patterns");
                            for (TokenPatternMatchSequence matchSequence : matchSequences) {
                                defaultDecision.addAuthority(matchSequence.getTokenPattern().getName());
                            }

                            TaggedToken<TokeniserOutcome> defaultTaggedToken = this.tokeniserService
                                    .getTaggedToken(token, defaultDecision);
                            TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService()
                                    .getTokenisedAtomicTokenSequence(history);
                            defaultSequence.add(defaultTaggedToken);
                            defaultSequence.addDecision(defaultDecision);
                            heap.add(defaultSequence);

                            // Apply one non-default decision per match sequence
                            for (int k = 0; k < matchSequences.size(); k++) {
                                TokenPatternMatchSequence matchSequence = matchSequences.get(k);
                                double prob = decisionProbs[k + 1];
                                Decision<TokeniserOutcome> decision = this.tokeniserDecisionFactory
                                        .createDecision(otherOutcome.getCode(), prob);
                                decision.addAuthority("_" + this.getClass().getSimpleName());
                                decision.addAuthority("_" + "Patterns");
                                decision.addAuthority(matchSequence.getTokenPattern().getName());

                                TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService
                                        .getTaggedToken(token, decision);

                                TokenisedAtomicTokenSequence newSequence = this.getTokeniserService()
                                        .getTokenisedAtomicTokenSequence(history);
                                newSequence.add(taggedToken);
                                newSequence.addDecision(decision);

                                // The decision is NOT the default decision for all tokens in the sequence, add all other tokens
                                // in this sequence to the solution
                                for (Token tokenInSequence : matchSequence.getTokensToCheck()) {
                                    if (tokenInSequence.equals(token)) {
                                        continue;
                                    }
                                    Decision<TokeniserOutcome> decisionInSequence = this.tokeniserDecisionFactory
                                            .createDefaultDecision(decision.getOutcome());
                                    decisionInSequence.addAuthority("_" + this.getClass().getSimpleName());
                                    decisionInSequence.addAuthority("_" + "DecisionInSequence");
                                    decisionInSequence.addAuthority("_" + "DecisionInSequence_non_default");
                                    decisionInSequence.addAuthority("_" + "Patterns");
                                    TaggedToken<TokeniserOutcome> taggedTokenInSequence = this.tokeniserService
                                            .getTaggedToken(tokenInSequence, decisionInSequence);
                                    newSequence.add(taggedTokenInSequence);
                                }

                                heap.add(newSequence);

                            } // next sequence
                        } else {
                            // token doesn't start match sequence, and hasn't already been added to the current sequence
                            Decision<TokeniserOutcome> decision = defaultDecisions.get(i);
                            if (matchedTokens.contains(token)) {
                                decision = this.tokeniserDecisionFactory
                                        .createDefaultDecision(decision.getOutcome());
                                decision.addAuthority("_" + this.getClass().getSimpleName());
                                decision.addAuthority("_" + "DecisionInSequence");
                                decision.addAuthority("_" + "DecisionInSequence_default");
                                decision.addAuthority("_" + "Patterns");
                            }
                            TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService
                                    .getTaggedToken(token, decision);

                            TokenisedAtomicTokenSequence newSequence = this.getTokeniserService()
                                    .getTokenisedAtomicTokenSequence(history);
                            newSequence.add(taggedToken);
                            heap.add(newSequence);
                        }

                    } // next sequence in the old heap
                } finally {
                    MONITOR.endTask("heap sort");
                }
            } // next token

            sequences = new ArrayList<TokenisedAtomicTokenSequence>();
            int k = 0;
            while (!heap.isEmpty()) {
                sequences.add(heap.poll());
                k++;
                if (k >= this.getBeamWidth())
                    break;
            }
        } else {
            sequences = new ArrayList<TokenisedAtomicTokenSequence>();
            TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService()
                    .getTokenisedAtomicTokenSequence(sentence, 0);
            int i = 0;
            for (Token token : tokenSequence.listWithWhiteSpace()) {
                TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token,
                        defaultDecisions.get(i++));
                defaultSequence.add(taggedToken);
            }
            sequences.add(defaultSequence);
        } // have decision maker?

        LOG.debug("####Final token sequences:");
        int j = 1;
        for (TokenisedAtomicTokenSequence sequence : sequences) {
            TokenSequence newTokenSequence = sequence.inferTokenSequence();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore()));
                LOG.debug("Atomic sequence: " + sequence);
                LOG.debug("Resulting sequence: " + newTokenSequence);
            }
            // need to re-apply the pre-processing filters, because the tokens are all new
            // Question: why can't we conserve the initial tokens when they haven't changed at all?
            // Answer: because the tokenSequence and index in the sequence is referenced by the token.
            // Question: should we create a separate class, Token and TokenInSequence,
            // one with index & sequence access & one without?
            for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) {
                tokenSequenceFilter.apply(newTokenSequence);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("After filters:      " + newTokenSequence);
            }
        }

        return sequences;
    } finally {
        MONITOR.endTask("tokeniseWithDecisions");
    }
}

From source file:mondrian.olap.fun.FunUtil.java

/**
 * Julian's algorithm for stable partial sort. Improves Pedro's algorithm
 * by using a heap (priority queue) for the top {@code limit} items seen.
 * The items on the priority queue have an ordinal field, so the queue
 * can be used to generate a list of stably sorted items. (Heap sort is
 * not normally stable.)//www . j av a2 s  .  co m
 *
 * @param list List to sort
 * @param comp Comparator
 * @param limit Maximum number of items to return
 * @param <T> Element type
 * @return Sorted list, containing at most limit items
 */
public static <T> List<T> stablePartialSortJulian(final List<T> list, final Comparator<T> comp, int limit) {
    final Comparator<ObjIntPair<T>> comp2 = new Comparator<ObjIntPair<T>>() {
        public int compare(ObjIntPair<T> o1, ObjIntPair<T> o2) {
            int c = comp.compare(o1.t, o2.t);
            if (c == 0) {
                c = Util.compare(o1.i, o2.i);
            }
            return -c;
        }
    };
    int filled = 0;
    final PriorityQueue<ObjIntPair<T>> queue = new PriorityQueue<ObjIntPair<T>>(limit, comp2);
    for (T element : list) {
        if (filled < limit) {
            queue.offer(new ObjIntPair<T>(element, filled++));
        } else {
            ObjIntPair<T> head = queue.element();
            if (comp.compare(element, head.t) <= 0) {
                ObjIntPair<T> item = new ObjIntPair<T>(element, filled++);
                if (comp2.compare(item, head) >= 0) {
                    ObjIntPair poll = queue.remove();
                    Util.discard(poll);
                    queue.offer(item);
                }
            }
        }
    }

    int n = queue.size();
    final Object[] elements = new Object[n];
    while (n > 0) {
        elements[--n] = queue.poll().t;
    }
    assert queue.isEmpty();
    //noinspection unchecked
    return Arrays.asList((T[]) elements);
}

From source file:org.apache.hadoop.hbase.extended.loadbalance.strategies.hotspot.HotSpotLoadBalancer.java

@Override
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
    initParameters();// ww  w.j  av  a  2s .c  o m
    /**
     * <pre>
     * We atleast need two priority queues 
     * a) It would contain HotSpot regions with their load as the moving criteria (max priority queue)
     * b) Non hot spot region with their loads (min priority queue)
     * 
     * Further we need to iterate over these queues and decrease the load so we 
     * need a data structure to build these queues 
     * and lastly we need to return the Region plan.
     * </pre>
     */

    LOG.debug("#################Came in the new Balancer Code and the cluster status is = " + this.status);
    long startTime = System.currentTimeMillis();
    int numServers = clusterState.size();
    if (numServers == 0) {
        LOG.info("numServers=0 so skipping load balancing");
        return null;

    }

    NavigableMap<HotSpotServerAndLoad, List<HotSpotRegionLoad>> regionServerAndServerLoadMap = new TreeMap<HotSpotServerAndLoad, List<HotSpotRegionLoad>>();
    PriorityQueue<HotSpotServerAndLoad> hotspotRegionServers = new PriorityQueue<HotSpotServerAndLoad>(
            numServers, HotSpotServerAndLoad.DESC_LOAD);
    PriorityQueue<HotSpotServerAndLoad> nonHotspotRegionServers = new PriorityQueue<HotSpotServerAndLoad>(
            numServers, HotSpotServerAndLoad.ASC_LOAD);
    HashBiMap<HRegionInfo, HotSpotRegionLoad> allRegionsLoadBiMap = HashBiMap.create();
    LOG.debug("#################clusterState=" + clusterState);
    double normalisedTotalLoadOfAllRegions = initRegionLoadMapsBasedOnInput(clusterState,
            regionServerAndServerLoadMap, allRegionsLoadBiMap);
    LOG.debug("#################normalisedTotalLoadOfAllRegions=" + normalisedTotalLoadOfAllRegions);
    // Check if we even need to do any load balancing
    double average = normalisedTotalLoadOfAllRegions / numServers; // for
    // logging
    // HBASE-3681 check sloppiness first
    LOG.debug("######################## final regionServerAndServerLoadMap == " + regionServerAndServerLoadMap);
    if (!loadBalancingNeeded(numServers, regionServerAndServerLoadMap, normalisedTotalLoadOfAllRegions,
            average)) {
        // we do not need load balancing
        return null;
    }
    double minLoad = normalisedTotalLoadOfAllRegions / numServers;
    double maxLoad = normalisedTotalLoadOfAllRegions % numServers == 0 ? minLoad : minLoad + 1;
    // as we now have to balance stuff, init PQ's
    LOG.debug(String.format("#################minLoad =%s,maxLoad= %s", minLoad, maxLoad));
    for (Map.Entry<HotSpotServerAndLoad, List<HotSpotRegionLoad>> item : regionServerAndServerLoadMap
            .entrySet()) {
        HotSpotServerAndLoad serverLoad = item.getKey();
        if (serverLoad.isHotSpot()) {

            hotspotRegionServers.add(serverLoad);
        } else {
            if (serverLoad.getLoad() < maxLoad) {
                nonHotspotRegionServers.add(serverLoad);
            }
        }
    }
    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(normalisedTotalLoadOfAllRegions)
            .append(", numServers=").append(numServers).append(", max=").append(maxLoad).append(", min=")
            .append(minLoad);
    LOG.debug(strBalanceParam.toString());
    List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();

    while (hotspotRegionServers.size() > 0 && nonHotspotRegionServers.size() > 0) {
        HotSpotServerAndLoad serverToBalance = hotspotRegionServers.poll();
        LOG.debug(String.format("#################serverToBalance =%s",
                serverToBalance.getServerName().getServerName()));
        // get least loaded not hotspot regions of this server
        List<HotSpotRegionLoad> regionList = regionServerAndServerLoadMap.get(serverToBalance);
        // assume it to be sorted asc.
        if (regionList.size() > 0) {
            HotSpotRegionLoad regionToMove = regionList.remove(0);
            HRegionInfo regionMoveInfo = allRegionsLoadBiMap.inverse().get(regionToMove);

            /*
             * regionMoveInfo can be null in case the load map returns us
             * the root and meta regions along with the movable regions But
             * as the clusterState which is passed to us does not contain
             * these regions we can have a situation where
             * regionServerAndServerLoadMap contains some regions which are
             * not present in the allRegionsLoadBiMap
             */
            if (regionMoveInfo != null && !regionMoveInfo.isMetaRegion() && !regionMoveInfo.isRootRegion()
                    && !regionMoveInfo.isMetaTable() && regionToMove.isRegionHotspot()) {
                LOG.debug(String.format(
                        "#################Came to move the region regionMoveInfo=%s;; regionToMove=%s ",
                        regionMoveInfo, regionToMove));
                // move out.
                HotSpotServerAndLoad destinationServer = nonHotspotRegionServers.poll();

                RegionPlan rpl = new RegionPlan(allRegionsLoadBiMap.inverse().get(regionToMove),
                        serverToBalance.getServerName(), destinationServer.getServerName());
                regionsToReturn.add(rpl);
                serverToBalance.modifyLoad(regionToMove.getLoad());
                destinationServer.modifyLoad(-1 * regionToMove.getLoad());
                // reenter them to list. if they satisfy conditions
                if (serverToBalance.getLoad() > minLoad) {
                    hotspotRegionServers.offer(serverToBalance);
                }
                if (destinationServer.getLoad() < maxLoad) {
                    nonHotspotRegionServers.offer(destinationServer);
                }
            }
        }
    }
    LOG.info("Total Time taken to balance = " + (System.currentTimeMillis() - startTime));
    LOG.info(String.format("#################regionsToReturn=%s ", regionsToReturn));
    return regionsToReturn;

}