Example usage for java.util PriorityQueue add

Introduction

In this page you can find the example usage for java.util PriorityQueue add.

Prototype

public boolean add(E e)

Source Link

Document

Inserts the specified element into this priority queue.

Usage

From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java

@Override
public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName,
        ExternalView externalView, List<InstanceConfig> instanceConfigList) {
    // The default routing table algorithm tries to balance all available segments across all servers, so that each
    // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger
    // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed,
    // returned, deserialized, aggregated, etc.).
    ////w w  w  .ja va 2 s . co m
    // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail
    // latency. This is due to the fact that a query cannot return until it has received a response from each server,
    // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a
    // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments
    // within any given routing table so that each server in the routing table has approximately the same number of
    // segments to process.
    //
    // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers.
    // With this set of servers, we check if the set of segments served by these servers is complete. If the set of
    // segments served does not cover all of the segments, we compute the list of missing segments and pick a random
    // server that serves these missing segments until we have complete coverage of all the segments.
    //
    // We then order the segments in ascending number of replicas within our server set, in order to allocate the
    // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to
    // end up on a replica with fewer segments.
    //
    // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely
    // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table
    // that's as even as possible.
    //
    // The algorithm to generate a routing table is thus:
    // 1. Compute the inverse external view, a mapping of servers to segments
    // 2. For each routing table to generate:
    //   a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers
    //   b) Check if the server set covers all the segments; if not, add additional servers until it does.
    //   c) Order the segments in our server set in ascending order of number of replicas present in our server set
    //   d) For each segment, pick a random replica with proper weighting
    //   e) Return that routing table
    //
    // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize
    // according to two criteria: the variance in workload per server for any individual table as well as the variance
    // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables
    // according to a per-routing table metric and discard the worst routing tables.

    RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator();
    routingTableGenerator.init(externalView, instanceConfigList);

    PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>(
            ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() {
                @Override
                public int compare(Pair<Map<String, Set<String>>, Float> left,
                        Pair<Map<String, Set<String>>, Float> right) {
                    // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison
                    return -Float.compare(left.getValue(), right.getValue());
                }
            });

    for (int i = 0; i < ROUTING_TABLE_COUNT; i++) {
        topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator));
    }

    // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones
    for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) {
        Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric(
                routingTableGenerator);
        Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek();

        // If the new routing table is better than the worst one, keep it
        if (newRoutingTable.getRight() < worstRoutingTable.getRight()) {
            topRoutingTables.poll();
            topRoutingTables.add(newRoutingTable);
        }
    }

    // Return the best routing tables
    List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size());
    while (!topRoutingTables.isEmpty()) {
        Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll();
        routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey()));
    }

    return routingTables;
}

From source file:org.apache.hadoop.hbase.io.hfile.LruBlockCache.java

/**
 * Eviction method.//from   w w  w .j  a va  2s.c om
 */
void evict() {

    // Ensure only one eviction at a time
    if (!evictionLock.tryLock())
        return;

    try {
        evictionInProgress = true;
        long currentSize = this.size.get();
        long bytesToFree = currentSize - minSize();

        if (LOG.isTraceEnabled()) {
            LOG.trace("Block cache LRU eviction started; Attempting to free "
                    + StringUtils.byteDesc(bytesToFree) + " of total=" + StringUtils.byteDesc(currentSize));
        }

        if (bytesToFree <= 0)
            return;

        // Instantiate priority buckets
        BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize, singleSize());
        BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize, multiSize());
        BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize, memorySize());

        // Scan entire map putting into appropriate buckets
        for (CachedBlock cachedBlock : map.values()) {
            switch (cachedBlock.getPriority()) {
            case SINGLE: {
                bucketSingle.add(cachedBlock);
                break;
            }
            case MULTI: {
                bucketMulti.add(cachedBlock);
                break;
            }
            case MEMORY: {
                bucketMemory.add(cachedBlock);
                break;
            }
            }
        }

        long bytesFreed = 0;
        if (forceInMemory || memoryFactor > 0.999f) {
            long s = bucketSingle.totalSize();
            long m = bucketMulti.totalSize();
            if (bytesToFree > (s + m)) {
                // this means we need to evict blocks in memory bucket to make room,
                // so the single and multi buckets will be emptied
                bytesFreed = bucketSingle.free(s);
                bytesFreed += bucketMulti.free(m);
                bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
            } else {
                // this means no need to evict block in memory bucket,
                // and we try best to make the ratio between single-bucket and
                // multi-bucket is 1:2
                long bytesRemain = s + m - bytesToFree;
                if (3 * s <= bytesRemain) {
                    // single-bucket is small enough that no eviction happens for it
                    // hence all eviction goes from multi-bucket
                    bytesFreed = bucketMulti.free(bytesToFree);
                } else if (3 * m <= 2 * bytesRemain) {
                    // multi-bucket is small enough that no eviction happens for it
                    // hence all eviction goes from single-bucket
                    bytesFreed = bucketSingle.free(bytesToFree);
                } else {
                    // both buckets need to evict some blocks
                    bytesFreed = bucketSingle.free(s - bytesRemain / 3);
                    if (bytesFreed < bytesToFree) {
                        bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
                    }
                }
            }
        } else {
            PriorityQueue<BlockBucket> bucketQueue = new PriorityQueue<BlockBucket>(3);

            bucketQueue.add(bucketSingle);
            bucketQueue.add(bucketMulti);
            bucketQueue.add(bucketMemory);

            int remainingBuckets = 3;

            BlockBucket bucket;
            while ((bucket = bucketQueue.poll()) != null) {
                long overflow = bucket.overflow();
                if (overflow > 0) {
                    long bucketBytesToFree = Math.min(overflow, (bytesToFree - bytesFreed) / remainingBuckets);
                    bytesFreed += bucket.free(bucketBytesToFree);
                }
                remainingBuckets--;
            }
        }

        if (LOG.isTraceEnabled()) {
            long single = bucketSingle.totalSize();
            long multi = bucketMulti.totalSize();
            long memory = bucketMemory.totalSize();
            LOG.trace("Block cache LRU eviction completed; " + "freed=" + StringUtils.byteDesc(bytesFreed)
                    + ", " + "total=" + StringUtils.byteDesc(this.size.get()) + ", " + "single="
                    + StringUtils.byteDesc(single) + ", " + "multi=" + StringUtils.byteDesc(multi) + ", "
                    + "memory=" + StringUtils.byteDesc(memory));
        }
    } finally {
        stats.evict();
        evictionInProgress = false;
        evictionLock.unlock();
    }
}

From source file:edu.snu.leader.hierarchy.simple.Individual.java

/**
 * Finds the nearest neighbors for this individual
 *
 * @param simState/*from  w w w . j av  a2 s  .c o  m*/
 */
private void findNearestNeighbors(SimulationState simState) {
    _LOG.trace("Entering findNearestNeighbors( simState )");

    // Get the number of nearest neighbors
    _nearestNeighborCount = simState.getNearestNeighborCount();

    // Build a priority queue to sort things for us
    PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>();

    // Iterate through all the individuals
    Iterator<Individual> indIter = simState.getAllIndividuals().iterator();
    while (indIter.hasNext()) {
        // Get the individual
        Individual ind = indIter.next();

        // If it is us, continue on
        if (_id.equals(ind._id)) {
            continue;
        }

        // Build a neighbor out of it and put it in the queue
        Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind);
        sortedNeighbors.add(neighbor);
    }

    // Get the "nearest" neighbors
    int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount);
    for (int i = 0; i < count; i++) {
        _nearestNeighbors.add(sortedNeighbors.poll());
    }

    _LOG.trace("Leaving findNearestNeighbors( simState )");
}

From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java

@Override
public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView,
        List<InstanceConfig> instanceConfigs) {
    // The default routing table algorithm tries to balance all available segments across all servers, so that each
    // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger
    // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed,
    // returned, deserialized, aggregated, etc.).
    ///* w  w w .  ja  v a 2  s  . c o  m*/
    // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail
    // latency. This is due to the fact that a query cannot return until it has received a response from each server,
    // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a
    // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments
    // within any given routing table so that each server in the routing table has approximately the same number of
    // segments to process.
    //
    // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers.
    // With this set of servers, we check if the set of segments served by these servers is complete. If the set of
    // segments served does not cover all of the segments, we compute the list of missing segments and pick a random
    // server that serves these missing segments until we have complete coverage of all the segments.
    //
    // We then order the segments in ascending number of replicas within our server set, in order to allocate the
    // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to
    // end up on a server with fewer segments.
    //
    // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a
    // routing table that's as even as possible.
    //
    // The algorithm to generate a routing table is thus:
    // 1. Compute the inverse external view, a mapping of servers to segments
    // 2. For each routing table to generate:
    //   a) Pick _targetNumServersPerQuery distinct servers
    //   b) Check if the server set covers all the segments; if not, add additional servers until it does
    //   c) Order the segments in our server set in ascending order of number of replicas present in our server set
    //   d) For each segment, pick a server with least segments already assigned
    //   e) Return that routing table
    //
    // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize
    // according to two criteria: the variance in workload per server for any individual table as well as the variance
    // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables
    // according to a per-routing table metric and discard the worst routing tables.

    RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator();
    routingTableGenerator.init(externalView, instanceConfigs);

    PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>(
            ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() {
                @Override
                public int compare(Pair<Map<String, List<String>>, Float> left,
                        Pair<Map<String, List<String>>, Float> right) {
                    // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison
                    return -Float.compare(left.getValue(), right.getValue());
                }
            });

    for (int i = 0; i < ROUTING_TABLE_COUNT; i++) {
        topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator));
    }

    // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones
    for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) {
        Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric(
                routingTableGenerator);
        Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek();

        // If the new routing table is better than the worst one, keep it
        if (newRoutingTable.getRight() < worstRoutingTable.getRight()) {
            topRoutingTables.poll();
            topRoutingTables.add(newRoutingTable);
        }
    }

    // Return the best routing tables
    List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size());
    while (!topRoutingTables.isEmpty()) {
        routingTables.add(topRoutingTables.poll().getKey());
    }

    setRoutingTables(routingTables);
}

From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java

@Override
public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) {
    MONITOR.startTask("tagSentence");
    try {/*from   w w w. j  a v  a 2s  .  c  om*/
        MONITOR.startTask("apply filters");
        try {
            for (TokenSequence tokenSequence : tokenSequences) {
                for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) {
                    tokenFilter.apply(tokenSequence);
                }
            }
        } finally {
            MONITOR.endTask("apply filters");
        }
        int sentenceLength = tokenSequences.get(0).getText().length();

        TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>();

        PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>();
        for (TokenSequence tokenSequence : tokenSequences) {
            // add an empty PosTagSequence for each token sequence
            PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0);
            emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy());
            heap0.add(emptySequence);
        }
        heaps.put(0.0, heap0);

        PriorityQueue<PosTagSequence> finalHeap = null;
        while (heaps.size() > 0) {
            Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry();
            if (LOG.isTraceEnabled()) {
                LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength);
            }
            if (heapEntry.getKey() == sentenceLength) {
                finalHeap = heapEntry.getValue();
                break;
            }
            PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue();

            // limit the breadth to K
            int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size();

            for (int j = 0; j < maxSequences; j++) {
                PosTagSequence history = previousHeap.poll();
                Token token = history.getNextToken();
                if (LOG.isTraceEnabled()) {
                    LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString());
                    LOG.trace("Prob: " + df.format(history.getScore()));
                    LOG.trace("Token: " + token.getText());

                    StringBuilder sb = new StringBuilder();
                    for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) {
                        if (oneToken.equals(token))
                            sb.append("[" + oneToken + "]");
                        else
                            sb.append(oneToken);
                    }
                    LOG.trace(sb.toString());
                }

                PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history);
                List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>();

                // test the positive rules on the current token
                boolean ruleApplied = false;
                if (posTaggerPositiveRules != null) {
                    MONITOR.startTask("check rules");
                    try {
                        for (PosTaggerRule rule : posTaggerPositiveRules) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("Checking rule: " + rule.getCondition().getName());
                            }
                            RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                            FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env);
                            if (ruleResult != null && ruleResult.getOutcome()) {
                                Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet()
                                        .createDefaultDecision(rule.getTag());
                                decisions.add(positiveRuleDecision);
                                positiveRuleDecision.addAuthority(rule.getCondition().getName());
                                ruleApplied = true;
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode());
                                }
                                break;
                            }
                        }
                    } finally {
                        MONITOR.endTask("check rules");
                    }
                }

                if (!ruleApplied) {
                    // test the features on the current token
                    List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
                    MONITOR.startTask("analyse features");
                    try {
                        for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
                            MONITOR.startTask(posTaggerFeature.getCollectionName());
                            try {
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
                                if (featureResult != null)
                                    featureResults.add(featureResult);
                            } finally {
                                MONITOR.endTask(posTaggerFeature.getCollectionName());
                            }
                        }
                        if (LOG.isTraceEnabled()) {
                            for (FeatureResult<?> result : featureResults) {
                                LOG.trace(result.toString());
                            }
                        }
                    } finally {
                        MONITOR.endTask("analyse features");
                    }

                    // evaluate the feature results using the maxent model
                    MONITOR.startTask("make decision");
                    decisions = this.decisionMaker.decide(featureResults);
                    MONITOR.endTask("make decision");

                    for (ClassificationObserver<PosTag> observer : this.observers) {
                        observer.onAnalyse(token, featureResults, decisions);
                    }

                    // apply the negative rules
                    Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>();
                    if (posTaggerNegativeRules != null) {
                        MONITOR.startTask("check negative rules");
                        try {
                            for (PosTaggerRule rule : posTaggerNegativeRules) {
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Checking negative rule: " + rule.getCondition().getName());
                                }
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env);
                                if (ruleResult != null && ruleResult.getOutcome()) {
                                    eliminatedPosTags.add(rule.getTag());
                                    if (LOG.isTraceEnabled()) {
                                        LOG.trace(
                                                "Rule applies. Eliminating posTag: " + rule.getTag().getCode());
                                    }
                                }
                            }

                            if (eliminatedPosTags.size() > 0) {
                                List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>();
                                for (Decision<PosTag> decision : decisions) {
                                    if (!eliminatedPosTags.contains(decision.getOutcome())) {
                                        decisionShortList.add(decision);
                                    } else {
                                        LOG.trace("Eliminating decision: " + decision.toString());
                                    }
                                }
                                if (decisionShortList.size() > 0) {
                                    decisions = decisionShortList;
                                } else {
                                    LOG.debug("All decisions eliminated! Restoring original decisions.");
                                }
                            }
                        } finally {
                            MONITOR.endTask("check negative rules");
                        }
                    }

                    // is this a known word in the lexicon?
                    MONITOR.startTask("apply constraints");
                    try {
                        if (LOG.isTraceEnabled()) {
                            String posTags = "";
                            for (PosTag onePosTag : token.getPossiblePosTags()) {
                                posTags += onePosTag.getCode() + ",";
                            }
                            LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags);
                        }

                        List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>();

                        for (Decision<PosTag> decision : decisions) {
                            if (decision.getProbability() >= MIN_PROB_TO_STORE) {
                                decisionShortList.add(decision);
                            }
                        }
                        if (decisionShortList.size() > 0) {
                            decisions = decisionShortList;
                        }
                    } finally {
                        MONITOR.endTask("apply constraints");
                    }
                } // has a rule been applied?

                // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt
                MONITOR.startTask("heap sort");
                for (Decision<PosTag> decision : decisions) {
                    if (LOG.isTraceEnabled())
                        LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability());

                    PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token,
                            decision);
                    PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history);
                    sequence.addPosTaggedToken(posTaggedToken);
                    if (decision.isStatistical())
                        sequence.addDecision(decision);

                    double heapIndex = token.getEndIndex();
                    // add another half for an empty token, to differentiate it from regular ones
                    if (token.getStartIndex() == token.getEndIndex())
                        heapIndex += 0.5;

                    // if it's the last token, make sure we end
                    if (token.getIndex() == sequence.getTokenSequence().size() - 1)
                        heapIndex = sentenceLength;

                    if (LOG.isTraceEnabled())
                        LOG.trace("Heap index: " + heapIndex);

                    PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex);
                    if (heap == null) {
                        heap = new PriorityQueue<PosTagSequence>();
                        heaps.put(heapIndex, heap);
                    }
                    heap.add(sequence);
                } // next outcome for this token
                MONITOR.endTask("heap sort");
            } // next history      
        } // next atomic index
          // return the best sequence on the heap
        List<PosTagSequence> sequences = new ArrayList<PosTagSequence>();
        int i = 0;
        while (!finalHeap.isEmpty()) {
            sequences.add(finalHeap.poll());
            i++;
            if (i >= this.getBeamWidth())
                break;
        }

        // apply post-processing filters
        LOG.debug("####Final postag sequences:");
        int j = 1;
        for (PosTagSequence sequence : sequences) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore()));
                LOG.debug("Sequence before filters: " + sequence);
            }
            for (PosTagSequenceFilter filter : this.postProcessingFilters)
                filter.apply(sequence);

            if (LOG.isDebugEnabled()) {
                LOG.debug("Sequence after filters: " + sequence);
            }
        }

        return sequences;
    } finally {
        MONITOR.endTask("tagSentence");
    }
}

From source file:edu.utsa.sifter.som.MainSOM.java

void initTerms() throws IOException {
    final Terms terms = MultiFields.getTerms(Reader, "body");

    System.out.println("number of terms in index: " + terms.size());
    final PriorityQueue<TermPair> topTerms = new PriorityQueue<TermPair>(Conf.MAX_VECTOR_FEATURES,
            new TermPair.TermPairComparator());

    int num = 0;/*w w w.j  av  a  2 s.c  o  m*/
    TermsEnum term = terms.iterator(null);
    while (term.next() != null) {
        final int count = term.docFreq();
        final double r = ((double) count) / Reader.numDocs();

        if (Conf.DOC_FREQ_THRESHOLD_LOW <= r && r <= Conf.DOC_FREQ_THRESHOLD_HIGH) {
            final String s = term.term().utf8ToString();
            if (s.length() >= Conf.MIN_SOM_TERM_LENGTH) {
                if (topTerms.size() < Conf.MAX_VECTOR_FEATURES) {
                    topTerms.add(new TermPair(s, count));
                } else if (topTerms.peek().DocCount < count) {
                    topTerms.remove();
                    topTerms.add(new TermPair(s, count));
                }
                ++num;
            }
        }
    }
    System.out.println(num + " terms with in doc frequency range");

    final int numFeatures = Math.min(topTerms.size(), Conf.MAX_VECTOR_FEATURES);
    TermIndices = new HashMap<String, Integer>((numFeatures * 4 + 1) / 3); // respect load factor
    Terms = new java.util.Vector<String>(numFeatures);
    Terms.setSize(numFeatures);
    System.out.println("the top " + numFeatures + " features will be used");
    for (int i = numFeatures - 1; i > -1; --i) { // reverse order, to put top terms first
        TermPair t = topTerms.poll(); // least remaining
        TermIndices.put(t.Term, i);
        Terms.set(i, t.Term);
        // System.out.println("Including term " + t.Term + " (" + t.DocCount + ")");
    }
}

From source file:delfos.group.grs.consensus.ConsensusGRS.java

public File getConsensusOutputXMLwithDesiredConsensusDegree(File consensusInputXML, double consensusDegree) {
    File consensusOutputDirectory = (File) getParameterValue(CONSENSUS_OUTPUT_FILES_DIRECTORY);

    String consensusInputXMLFileNameNoExtension = consensusInputXML.getName().substring(0,
            consensusInputXML.getName().lastIndexOf("."));

    String consensusInputXMLInOutputDirectoryAbsolutePath = consensusOutputDirectory.getAbsolutePath()
            + File.separator + consensusInputXMLFileNameNoExtension;

    File consensusInputXMLInOutputDirectory = new File(consensusInputXMLInOutputDirectoryAbsolutePath);

    if (!consensusInputXML.exists()) {
        Global.showWarning("The input XML '" + consensusInputXMLInOutputDirectory
                + "' does not exists in the output directory");
        return null;
    }//from   www  .  ja  v  a  2s.c om

    if (!consensusOutputDirectory.exists()) {
        Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' not exists");
        return null;
    }

    if (!consensusOutputDirectory.isDirectory()) {
        Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' is not a directory");
        return null;
    }

    List<File> childrenFiles = new ArrayList<>(Arrays.asList(consensusOutputDirectory.listFiles()));
    PriorityQueue<PriorityItem<File>> queue = new PriorityQueue<>(Collections.reverseOrder());

    for (File consensusOutputFile : childrenFiles) {
        final String outputFileNameNoExtension = consensusOutputFile.getName().substring(0,
                consensusOutputFile.getName().lastIndexOf("."));
        if (outputFileNameNoExtension.startsWith(consensusInputXMLFileNameNoExtension)
                && outputFileNameNoExtension.contains("Consenso")) {
            try {
                Global.showln(consensusOutputFile.getAbsolutePath());
                double thisFileConsensusDegree = ConsensusOfIndividualRecommendationsToXML
                        .readConsensusOutputXML(consensusOutputFile).consensusDegree;

                queue.add(new PriorityItem<>(consensusOutputFile, thisFileConsensusDegree));
            } catch (JDOMException | IOException ex) {
                Global.showWarning(ex);
            }
        }
    }

    if (queue.isEmpty()) {
        return null;
    }

    if (Global.isVerboseAnnoying()) {
        Global.showInfoMessage("Found " + queue.size() + " consensus files");
    }

    while (!queue.isEmpty()) {
        PriorityItem<File> priorityItem = queue.poll();

        double consensusDegreeThisFile = priorityItem.getPriority();

        if (consensusDegreeThisFile >= consensusDegree) {
            return priorityItem.getKey();
        }
    }

    throw new IllegalStateException(
            "Consensus degree not reached for '" + consensusInputXMLFileNameNoExtension + "'");
}

From source file:com.joliciel.jochre.lexicon.MostLikelyWordChooserImpl.java

public LetterSequence chooseMostLikelyWord(List<LetterSequence> heap, List<LetterSequence> holdoverHeap,
        int n) {/*from  w ww .  ja v  a2  s. com*/
    LetterSequence bestSequence = null;

    List<LetterSequence> holdoverWithDash = new ArrayList<LetterSequence>(n);
    List<LetterSequence> holdoverWithoutDash = new ArrayList<LetterSequence>(n);

    int i = 0;
    for (LetterSequence holdoverSequence : holdoverHeap) {
        if (i >= n)
            break;
        if (holdoverSequence.toString().endsWith("-"))
            holdoverWithDash.add(holdoverSequence);
        else
            holdoverWithoutDash.add(holdoverSequence);
        i++;
    }

    PriorityQueue<LetterSequence> combinedHeap = new PriorityQueue<LetterSequence>();
    for (LetterSequence sequenceWithDash : holdoverWithDash) {
        // find the dash that needs to be skipped at the end of sequence 1
        for (int j = sequenceWithDash.size() - 1; j >= 0; j--) {
            Letter outcome = sequenceWithDash.get(j);
            if (outcome.getString().equals("-")) {
                sequenceWithDash.setDashToSkip(j);
                break;
            }
        }
        for (LetterSequence letterSequence : heap) {
            LetterSequence combinedSequence = this.getLetterGuesserService().getLetterSequence(sequenceWithDash,
                    letterSequence);
            combinedHeap.add(combinedSequence);
        }
    }

    List<LetterSequence> combinedSequences = new ArrayList<LetterSequence>();
    for (i = 0; i < n; i++) {
        if (combinedHeap.isEmpty())
            break;
        combinedSequences.add(combinedHeap.poll());
    }

    if (holdoverWithoutDash.size() == 0) {
        // all holdovers end with a dash
        // therefore we must combine the two sequences
        bestSequence = this.chooseMostLikelyWord(combinedSequences, n);

    } else {
        // some holdovers end with a dash, others don't
        // need to compare combined sequences with individual sequences
        LetterSequence bestCombinedSequence = this.chooseMostLikelyWord(combinedSequences, n);

        // Originally we only included sequences without dashes here
        // However, this falsifies the results towards those without a dash
        // especially in the case where sequence 1 or sequence 2 is also a common word (e.g. der in Yiddish)
        //         PriorityQueue<LetterSequence> holdoverHeapWithoutDash = new PriorityQueue<LetterSequence>(holdoverWithoutDash);
        //         LetterSequence bestHoldoverSequenceWithoutDash = this.chooseMostLikelyWord(holdoverHeapWithoutDash, n);
        // Changed it to the following:
        LetterSequence bestHoldoverSequence = this.chooseMostLikelyWord(holdoverHeap, n);
        LetterSequence bestNextRowSequence = this.chooseMostLikelyWord(heap, n);

        if (LOG.isDebugEnabled()) {
            LOG.debug("Best combined: " + bestCombinedSequence.toString() + ". Adjusted score: "
                    + bestCombinedSequence.getAdjustedScore());
            LOG.debug("Best seq1 separate: " + bestHoldoverSequence.toString() + ". Adjusted score: "
                    + bestHoldoverSequence.getAdjustedScore());
            LOG.debug("Best seq2 separate: " + bestNextRowSequence.toString() + ". Adjusted score: "
                    + bestNextRowSequence.getAdjustedScore());
        }

        // Now, to compare the best combined with the best separate scores, we need to get a geometric mean of the shapes
        // in the best separate ones, and adjust for the lowest frequency word
        LetterSequence separateSequence = this.letterGuesserService.getLetterSequence(bestHoldoverSequence,
                bestNextRowSequence);
        int minFrequency = bestHoldoverSequence.getFrequency() < bestNextRowSequence.getFrequency()
                ? bestHoldoverSequence.getFrequency()
                : bestNextRowSequence.getFrequency();
        double freqLog = this.getFrequencyAdjustment(minFrequency);
        double separateAdjustedScore = separateSequence.getScore() * freqLog + additiveSmoothing;
        separateSequence.setAdjustedScore(separateAdjustedScore);
        if (LOG.isDebugEnabled())
            LOG.debug("Best separate: " + separateSequence.toString() + ". Score: "
                    + separateSequence.getScore() + ". Freq: " + minFrequency + ". Adjusted: " + freqLog
                    + ". Adjusted score: " + separateSequence.getAdjustedScore());

        if (bestCombinedSequence.getAdjustedScore() > separateAdjustedScore) {
            if (LOG.isDebugEnabled())
                LOG.debug("Using combined sequence");
            bestSequence = bestCombinedSequence;
        } else {
            if (LOG.isDebugEnabled())
                LOG.debug("Using separate sequences");
            bestSequence = this.getLetterGuesserService().getLetterSequence(bestHoldoverSequence,
                    bestNextRowSequence);
        }
        if (LOG.isDebugEnabled())
            LOG.debug("Best with holdover: " + bestSequence.toString());
    }

    return bestSequence;
}

From source file:io.warp10.script.functions.OPTDTW.java

@Override
public Object apply(WarpScriptStack stack) throws WarpScriptException {
    Object o = stack.pop();//from   w w  w  .  j  a v  a2 s.  c o m

    if (!(o instanceof Number)) {
        throw new WarpScriptException(
                getName() + " expects a count of best restults to return on top of the stack.");
    }

    int count = ((Number) o).intValue();

    o = stack.pop();

    if (!(o instanceof List)) {
        throw new WarpScriptException(getName() + " expects a numeric list to use as query below the count.");
    }

    double[] query = new double[((List) o).size()];
    int i = 0;
    for (Object oo : (List) o) {
        query[i++] = ((Number) oo).doubleValue();
    }

    // Z-Normalize query
    double[] musigma = DoubleUtils.musigma(query, true);
    for (i = 0; i < query.length; i++) {
        query[i] = (query[i] - musigma[0]) / musigma[1];
    }

    o = stack.pop();

    if (!(o instanceof List)) {
        throw new WarpScriptException(getName()
                + " expects a numeric list as the sequence in which to find best matches below the 'query' list.");
    }

    double[] sequence = new double[((List) o).size()];
    i = 0;
    for (Object oo : (List) o) {
        sequence[i++] = ((Number) oo).doubleValue();
    }

    if (sequence.length <= query.length) {
        throw new WarpScriptException(
                getName() + " expects the query list to be shorter than the sequence list.");
    }

    double mindist = 0.0;

    PriorityQueue<Pair<Integer, Double>> distances = new PriorityQueue<Pair<Integer, Double>>(
            new Comparator<Pair<Integer, Double>>() {
                @Override
                public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });

    double[] subsequence = new double[query.length];

    for (i = 0; i <= sequence.length - query.length; i++) {
        System.arraycopy(sequence, i, subsequence, 0, query.length);
        // Z-Normalize the subsequence
        musigma = DoubleUtils.musigma(subsequence, true);
        for (int j = 0; j < subsequence.length; j++) {
            subsequence[j] = (subsequence[j] - musigma[0]) / musigma[1];
        }
        double dist = dtw.compute(query, 0, query.length, subsequence, 0, query.length, mindist);

        if (dist < 0) {
            continue;
        }

        distances.add(new Pair<Integer, Double>(i, dist));

        //
        // If the priority queue is of 'count' size, retrieve the largest distance and
        // use it as the threshold for the DTW computation
        //

        if (count > 0 && distances.size() >= count) {
            Object adist[] = distances.toArray();
            mindist = ((Pair<Integer, Double>) adist[count - 1]).getValue();
        }
    }

    List<List<Object>> results = new ArrayList<List<Object>>();

    while (!distances.isEmpty()) {

        Pair<Integer, Double> entry = distances.poll();

        List<Object> result = new ArrayList<Object>();
        result.add(entry.getKey());
        result.add(entry.getValue());
        results.add(result);

        if (count > 0 && count == results.size()) {
            break;
        }
    }

    stack.push(results);

    return stack;
}

From source file:com.addthis.hydra.data.io.DiskBackedList2.java

/**
 * Sort the collection of elements using a standard external sort algorithm: sort each chunk of elements, then
 * merge the chunks into a new list, then switch to the new list.
 *///from ww  w . ja  va 2 s.c  om
public void sort(final Comparator<? super K> comp) {
    try {
        // Sort each chunk. Done if there is only one chunk.
        sortEachChunk(comp);
        if (chunks.size() <= 1) {
            return;
        }
        Comparator<Pair<K, Integer>> pairComp = new Comparator<Pair<K, Integer>>() {
            @Override
            public int compare(Pair<K, Integer> e1, Pair<K, Integer> e2) {
                return comp.compare(e1.getLeft(), e2.getLeft());
            }
        };
        // This heap stores the lowest remaining value from each chunk
        PriorityQueue<Pair<K, Integer>> heap = new PriorityQueue<>(chunks.size(), pairComp);
        ArrayList<Iterator> iterators = new ArrayList<>(chunks.size());

        // Initialize the heap with one value per chunk
        close();
        for (int i = 0; i < chunks.size(); i++) {
            Iterator<K> it = chunks.get(i).getChunkIterator();
            iterators.add(i, it);
            if (it.hasNext()) {
                K elt = it.next();
                if (elt != null) {
                    heap.add(Pair.of(elt, i));
                }
            }
        }
        // Make a new disk backed list to store sorted values.
        // When the number of chunks is large, the size of the output buffer needs to shrink to make up for the extra mem usage
        long storageMaxChunkSize = maxChunkSizeBytes / (1 + chunks.size() / 20);
        DiskBackedList2<K> storage = new DiskBackedList2<>(codec, storageMaxChunkSize, directory);

        // Repeatedly pull the smallest element from the heap
        while (!heap.isEmpty()) {
            Pair<K, Integer> leastElt = heap.poll();
            storage.add(leastElt.getLeft());
            @SuppressWarnings({ "unchecked" })
            Iterator<K> polledIterator = iterators.get(leastElt.getRight());
            if (polledIterator.hasNext()) {
                heap.add(Pair.of(polledIterator.next(), leastElt.getRight()));
            }
        }

        // Switch to the storage dbl's chunks
        storage.close();
        chunks = storage.getChunks();
        currentChunk = null;
    } catch (IOException io) {
        throw Throwables.propagate(io);
    }
}