Example usage for java.util PriorityQueue size

List of usage examples for java.util PriorityQueue size

Introduction

In this page you can find the example usage for java.util PriorityQueue size.

Prototype

int size

To view the source code for java.util PriorityQueue size.

Click Source Link

Document

The number of elements in the priority queue.

Usage

From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java

@Override
public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) {
    MONITOR.startTask("tagSentence");
    try {/*  w w  w  . jav  a  2 s .c  om*/
        MONITOR.startTask("apply filters");
        try {
            for (TokenSequence tokenSequence : tokenSequences) {
                for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) {
                    tokenFilter.apply(tokenSequence);
                }
            }
        } finally {
            MONITOR.endTask("apply filters");
        }
        int sentenceLength = tokenSequences.get(0).getText().length();

        TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>();

        PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>();
        for (TokenSequence tokenSequence : tokenSequences) {
            // add an empty PosTagSequence for each token sequence
            PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0);
            emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy());
            heap0.add(emptySequence);
        }
        heaps.put(0.0, heap0);

        PriorityQueue<PosTagSequence> finalHeap = null;
        while (heaps.size() > 0) {
            Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry();
            if (LOG.isTraceEnabled()) {
                LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength);
            }
            if (heapEntry.getKey() == sentenceLength) {
                finalHeap = heapEntry.getValue();
                break;
            }
            PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue();

            // limit the breadth to K
            int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size();

            for (int j = 0; j < maxSequences; j++) {
                PosTagSequence history = previousHeap.poll();
                Token token = history.getNextToken();
                if (LOG.isTraceEnabled()) {
                    LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString());
                    LOG.trace("Prob: " + df.format(history.getScore()));
                    LOG.trace("Token: " + token.getText());

                    StringBuilder sb = new StringBuilder();
                    for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) {
                        if (oneToken.equals(token))
                            sb.append("[" + oneToken + "]");
                        else
                            sb.append(oneToken);
                    }
                    LOG.trace(sb.toString());
                }

                PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history);
                List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>();

                // test the positive rules on the current token
                boolean ruleApplied = false;
                if (posTaggerPositiveRules != null) {
                    MONITOR.startTask("check rules");
                    try {
                        for (PosTaggerRule rule : posTaggerPositiveRules) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("Checking rule: " + rule.getCondition().getName());
                            }
                            RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                            FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env);
                            if (ruleResult != null && ruleResult.getOutcome()) {
                                Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet()
                                        .createDefaultDecision(rule.getTag());
                                decisions.add(positiveRuleDecision);
                                positiveRuleDecision.addAuthority(rule.getCondition().getName());
                                ruleApplied = true;
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode());
                                }
                                break;
                            }
                        }
                    } finally {
                        MONITOR.endTask("check rules");
                    }
                }

                if (!ruleApplied) {
                    // test the features on the current token
                    List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
                    MONITOR.startTask("analyse features");
                    try {
                        for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
                            MONITOR.startTask(posTaggerFeature.getCollectionName());
                            try {
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
                                if (featureResult != null)
                                    featureResults.add(featureResult);
                            } finally {
                                MONITOR.endTask(posTaggerFeature.getCollectionName());
                            }
                        }
                        if (LOG.isTraceEnabled()) {
                            for (FeatureResult<?> result : featureResults) {
                                LOG.trace(result.toString());
                            }
                        }
                    } finally {
                        MONITOR.endTask("analyse features");
                    }

                    // evaluate the feature results using the maxent model
                    MONITOR.startTask("make decision");
                    decisions = this.decisionMaker.decide(featureResults);
                    MONITOR.endTask("make decision");

                    for (ClassificationObserver<PosTag> observer : this.observers) {
                        observer.onAnalyse(token, featureResults, decisions);
                    }

                    // apply the negative rules
                    Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>();
                    if (posTaggerNegativeRules != null) {
                        MONITOR.startTask("check negative rules");
                        try {
                            for (PosTaggerRule rule : posTaggerNegativeRules) {
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("Checking negative rule: " + rule.getCondition().getName());
                                }
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env);
                                if (ruleResult != null && ruleResult.getOutcome()) {
                                    eliminatedPosTags.add(rule.getTag());
                                    if (LOG.isTraceEnabled()) {
                                        LOG.trace(
                                                "Rule applies. Eliminating posTag: " + rule.getTag().getCode());
                                    }
                                }
                            }

                            if (eliminatedPosTags.size() > 0) {
                                List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>();
                                for (Decision<PosTag> decision : decisions) {
                                    if (!eliminatedPosTags.contains(decision.getOutcome())) {
                                        decisionShortList.add(decision);
                                    } else {
                                        LOG.trace("Eliminating decision: " + decision.toString());
                                    }
                                }
                                if (decisionShortList.size() > 0) {
                                    decisions = decisionShortList;
                                } else {
                                    LOG.debug("All decisions eliminated! Restoring original decisions.");
                                }
                            }
                        } finally {
                            MONITOR.endTask("check negative rules");
                        }
                    }

                    // is this a known word in the lexicon?
                    MONITOR.startTask("apply constraints");
                    try {
                        if (LOG.isTraceEnabled()) {
                            String posTags = "";
                            for (PosTag onePosTag : token.getPossiblePosTags()) {
                                posTags += onePosTag.getCode() + ",";
                            }
                            LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags);
                        }

                        List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>();

                        for (Decision<PosTag> decision : decisions) {
                            if (decision.getProbability() >= MIN_PROB_TO_STORE) {
                                decisionShortList.add(decision);
                            }
                        }
                        if (decisionShortList.size() > 0) {
                            decisions = decisionShortList;
                        }
                    } finally {
                        MONITOR.endTask("apply constraints");
                    }
                } // has a rule been applied?

                // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt
                MONITOR.startTask("heap sort");
                for (Decision<PosTag> decision : decisions) {
                    if (LOG.isTraceEnabled())
                        LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability());

                    PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token,
                            decision);
                    PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history);
                    sequence.addPosTaggedToken(posTaggedToken);
                    if (decision.isStatistical())
                        sequence.addDecision(decision);

                    double heapIndex = token.getEndIndex();
                    // add another half for an empty token, to differentiate it from regular ones
                    if (token.getStartIndex() == token.getEndIndex())
                        heapIndex += 0.5;

                    // if it's the last token, make sure we end
                    if (token.getIndex() == sequence.getTokenSequence().size() - 1)
                        heapIndex = sentenceLength;

                    if (LOG.isTraceEnabled())
                        LOG.trace("Heap index: " + heapIndex);

                    PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex);
                    if (heap == null) {
                        heap = new PriorityQueue<PosTagSequence>();
                        heaps.put(heapIndex, heap);
                    }
                    heap.add(sequence);
                } // next outcome for this token
                MONITOR.endTask("heap sort");
            } // next history      
        } // next atomic index
          // return the best sequence on the heap
        List<PosTagSequence> sequences = new ArrayList<PosTagSequence>();
        int i = 0;
        while (!finalHeap.isEmpty()) {
            sequences.add(finalHeap.poll());
            i++;
            if (i >= this.getBeamWidth())
                break;
        }

        // apply post-processing filters
        LOG.debug("####Final postag sequences:");
        int j = 1;
        for (PosTagSequence sequence : sequences) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore()));
                LOG.debug("Sequence before filters: " + sequence);
            }
            for (PosTagSequenceFilter filter : this.postProcessingFilters)
                filter.apply(sequence);

            if (LOG.isDebugEnabled()) {
                LOG.debug("Sequence after filters: " + sequence);
            }
        }

        return sequences;
    } finally {
        MONITOR.endTask("tagSentence");
    }
}

From source file:com.joliciel.talismane.tokeniser.patterns.IntervalPatternTokeniser.java

@Override
public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) {
    MONITOR.startTask("tokeniseWithDecisions");
    try {//from w  w  w  .ja  va  2  s .c o m
        // apply any pre-tokenisation decisions via filters
        // we only want one placeholder per start index - the first one that gets added
        Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>();
        for (TokenFilter tokenFilter : this.tokenFilters) {
            Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText());
            for (TokenPlaceholder placeholder : myPlaceholders) {
                if (!placeholderMap.containsKey(placeholder.getStartIndex())) {
                    placeholderMap.put(placeholder.getStartIndex(), placeholder);
                }
            }
            if (LOG.isTraceEnabled()) {
                if (myPlaceholders.size() > 0) {
                    LOG.trace("TokenFilter: " + tokenFilter);
                    LOG.trace("placeholders: " + myPlaceholders);
                }
            }
        }

        Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values());

        // Initially, separate the sentence into tokens using the separators provided
        TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS,
                placeholders);

        // apply any pre-processing filters that have been added
        for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) {
            tokenSequenceFilter.apply(tokenSequence);
        }

        // Assign each separator its default value
        List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence);
        List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>(
                defaultOutcomes.size());
        for (TokeniserOutcome outcome : defaultOutcomes) {
            Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory
                    .createDefaultDecision(outcome);
            tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName());
            tokeniserDecision.addAuthority("_" + "DefaultDecision");
            defaultDecisions.add(tokeniserDecision);
        }
        List<TokenisedAtomicTokenSequence> sequences = null;

        // For each test pattern, see if anything in the sentence matches it
        if (this.decisionMaker != null) {
            Set<Token> tokensToCheck = new HashSet<Token>();
            MONITOR.startTask("pattern matching");
            try {
                for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) {
                    Set<Token> tokensToCheckForThisPattern = new HashSet<Token>();
                    List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence);
                    for (TokenPatternMatchSequence tokenPatternMatch : matchesForThisPattern) {
                        if (LOG.isTraceEnabled())
                            tokensToCheckForThisPattern.addAll(tokenPatternMatch.getTokensToCheck());
                        tokensToCheck.addAll(tokenPatternMatch.getTokensToCheck());
                    }
                    if (LOG.isTraceEnabled()) {
                        if (tokensToCheckForThisPattern.size() > 0) {
                            LOG.trace("Parsed pattern: " + parsedPattern);
                            LOG.trace("tokensToCheck: " + tokensToCheckForThisPattern);
                        }
                    }
                }
            } finally {
                MONITOR.endTask("pattern matching");
            }

            // we want to create the n most likely token sequences
            // the sequence has to correspond to a token pattern

            // initially create a heap with a single, empty sequence
            PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>();
            TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService()
                    .getTokenisedAtomicTokenSequence(sentence, 0);
            heap.add(emptySequence);
            int i = 0;
            for (Token token : tokenSequence.listWithWhiteSpace()) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Token : \"" + token.getText() + "\"");
                }
                // build a new heap for this iteration
                PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap;
                heap = new PriorityQueue<TokenisedAtomicTokenSequence>();

                // limit the heap breadth to K
                int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth()
                        : previousHeap.size();
                for (int j = 0; j < maxSequences; j++) {
                    TokenisedAtomicTokenSequence history = previousHeap.poll();

                    // Find the separating & non-separating decisions
                    List<Decision<TokeniserOutcome>> decisions = null;
                    if (tokensToCheck.contains(token)) {
                        // test the features on the current token
                        TokeniserContext context = new TokeniserContext(token, history);
                        List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>();
                        MONITOR.startTask("analyse features");
                        try {
                            for (TokeniserContextFeature<?> feature : tokeniserContextFeatures) {
                                RuntimeEnvironment env = this.featureService.getRuntimeEnvironment();
                                FeatureResult<?> featureResult = feature.check(context, env);
                                if (featureResult != null) {
                                    tokenFeatureResults.add(featureResult);
                                }
                            }

                            if (LOG.isTraceEnabled()) {
                                for (FeatureResult<?> featureResult : tokenFeatureResults) {
                                    LOG.trace(featureResult.toString());
                                }
                            }
                        } finally {
                            MONITOR.endTask("analyse features");
                        }

                        MONITOR.startTask("make decision");
                        try {
                            decisions = this.decisionMaker.decide(tokenFeatureResults);

                            for (ClassificationObserver<TokeniserOutcome> observer : this.observers)
                                observer.onAnalyse(token, tokenFeatureResults, decisions);

                            for (Decision<TokeniserOutcome> decision : decisions) {
                                decision.addAuthority(this.getClass().getSimpleName());
                                for (TokenPatternMatch tokenMatch : token.getMatches()) {
                                    decision.addAuthority(tokenMatch.getPattern().toString());
                                }
                            }
                        } finally {
                            MONITOR.endTask("make decision");
                        }
                    } else {
                        decisions = new ArrayList<Decision<TokeniserOutcome>>();
                        decisions.add(defaultDecisions.get(i));
                    }

                    MONITOR.startTask("heap sort");
                    try {
                        for (Decision<TokeniserOutcome> decision : decisions) {
                            TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService
                                    .getTaggedToken(token, decision);

                            TokenisedAtomicTokenSequence tokenisedSequence = this.getTokeniserService()
                                    .getTokenisedAtomicTokenSequence(history);
                            tokenisedSequence.add(taggedToken);
                            if (decision.isStatistical())
                                tokenisedSequence.addDecision(decision);
                            heap.add(tokenisedSequence);
                        }
                    } finally {
                        MONITOR.endTask("heap sort");
                    }

                } // next sequence in the old heap
                i++;
            } // next token

            sequences = new ArrayList<TokenisedAtomicTokenSequence>();
            i = 0;
            while (!heap.isEmpty()) {
                sequences.add(heap.poll());
                i++;
                if (i >= this.getBeamWidth())
                    break;
            }
        } else {
            sequences = new ArrayList<TokenisedAtomicTokenSequence>();
            TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService()
                    .getTokenisedAtomicTokenSequence(sentence, 0);
            int i = 0;
            for (Token token : tokenSequence.listWithWhiteSpace()) {
                TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token,
                        defaultDecisions.get(i++));
                defaultSequence.add(taggedToken);
            }
            sequences.add(defaultSequence);
        } // have decision maker?

        LOG.debug("####Final token sequences:");
        int j = 1;
        for (TokenisedAtomicTokenSequence sequence : sequences) {
            TokenSequence newTokenSequence = sequence.inferTokenSequence();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore()));
                LOG.debug("Atomic sequence: " + sequence);
                LOG.debug("Resulting sequence: " + newTokenSequence);
            }
            // need to re-apply the pre-processing filters, because the tokens are all new
            // Question: why can't we conserve the initial tokens when they haven't changed at all?
            // Answer: because the tokenSequence and index in the sequence is referenced by the token.
            // Question: should we create a separate class, Token and TokenInSequence,
            // one with index & sequence access & one without?
            for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) {
                tokenSequenceFilter.apply(newTokenSequence);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("After filters: " + newTokenSequence);
            }
        }

        return sequences;
    } finally {
        MONITOR.endTask("tokeniseWithDecisions");
    }
}

From source file:eu.stratosphere.pact.test.util.TestBase.java

/**
 * Compares the expectedResultString and the file(s) in the HDFS linewise.
 * Both results (expected and computed) are held in memory. Hence, this
 * method should not be used to compare large results.
 * /*from w  ww .  ja  v a  2s  . c  om*/
 * The line comparator is used to compare lines from the expected and result set.
 * 
 * @param expectedResult
 * @param hdfsPath
 * @param comp Line comparator
 */
protected void compareResultsByLinesInMemory(String expectedResultStr, String resultPath,
        Comparator<String> comp) throws Exception {

    ArrayList<String> resultFiles = new ArrayList<String>();

    // Determine all result files
    if (getFilesystemProvider().isDir(resultPath)) {
        for (String file : getFilesystemProvider().listFiles(resultPath)) {
            if (!getFilesystemProvider().isDir(file)) {
                resultFiles.add(resultPath + "/" + file);
            }
        }
    } else {
        resultFiles.add(resultPath);
    }

    // collect lines of all result files
    PriorityQueue<String> computedResult = new PriorityQueue<String>();
    for (String resultFile : resultFiles) {
        // read each result file
        InputStream is = getFilesystemProvider().getInputStream(resultFile);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        String line = reader.readLine();

        // collect lines
        while (line != null) {
            computedResult.add(line);
            line = reader.readLine();
        }
        reader.close();
    }

    PriorityQueue<String> expectedResult = new PriorityQueue<String>();
    StringTokenizer st = new StringTokenizer(expectedResultStr, "\n");
    while (st.hasMoreElements()) {
        expectedResult.add(st.nextToken());
    }

    // log expected and computed results
    if (LOG.isDebugEnabled()) {
        LOG.debug("Expected: " + expectedResult);
        LOG.debug("Computed: " + computedResult);
    }

    Assert.assertEquals("Computed and expected results have different size", expectedResult.size(),
            computedResult.size());

    while (!expectedResult.isEmpty()) {
        String expectedLine = expectedResult.poll();
        String computedLine = computedResult.poll();

        if (LOG.isDebugEnabled())
            LOG.debug("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">");

        Assert.assertEquals("Computed and expected lines differ", expectedLine, computedLine);
    }
}

From source file:edu.snu.leader.hierarchy.simple.test.TestIndividual.java

/**
 * Initialize the individual/*from   w ww  .  ja va 2 s  . com*/
 *
 * @param allIndividuals
 */
public void initialize(List<TestIndividual> allIndividuals) {
    // Basically, we just need to find our neighbors
    // Build a priority queue to sort things for us
    PriorityQueue<TestNeighbor> sortedNeighbors = new PriorityQueue<TestNeighbor>();

    // Iterate through all the individuals
    Iterator<TestIndividual> indIter = allIndividuals.iterator();
    while (indIter.hasNext()) {
        // Get the individual
        TestIndividual ind = indIter.next();

        // If it is us, continue on
        if (_id.equals(ind._id)) {
            continue;
        }

        // Build a neighbor out of it and put it in the queue
        TestNeighbor neighbor = new TestNeighbor((float) _location.distance(ind._location), ind);
        sortedNeighbors.add(neighbor);
    }

    // Get the "nearest" neighbors
    int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount);
    for (int i = 0; i < count; i++) {
        _nearestNeighbors.add(sortedNeighbors.poll());
    }
}

From source file:edu.utsa.sifter.som.MainSOM.java

void initTerms() throws IOException {
    final Terms terms = MultiFields.getTerms(Reader, "body");

    System.out.println("number of terms in index: " + terms.size());
    final PriorityQueue<TermPair> topTerms = new PriorityQueue<TermPair>(Conf.MAX_VECTOR_FEATURES,
            new TermPair.TermPairComparator());

    int num = 0;//from  w  ww .j av a 2s  . c  o  m
    TermsEnum term = terms.iterator(null);
    while (term.next() != null) {
        final int count = term.docFreq();
        final double r = ((double) count) / Reader.numDocs();

        if (Conf.DOC_FREQ_THRESHOLD_LOW <= r && r <= Conf.DOC_FREQ_THRESHOLD_HIGH) {
            final String s = term.term().utf8ToString();
            if (s.length() >= Conf.MIN_SOM_TERM_LENGTH) {
                if (topTerms.size() < Conf.MAX_VECTOR_FEATURES) {
                    topTerms.add(new TermPair(s, count));
                } else if (topTerms.peek().DocCount < count) {
                    topTerms.remove();
                    topTerms.add(new TermPair(s, count));
                }
                ++num;
            }
        }
    }
    System.out.println(num + " terms with in doc frequency range");

    final int numFeatures = Math.min(topTerms.size(), Conf.MAX_VECTOR_FEATURES);
    TermIndices = new HashMap<String, Integer>((numFeatures * 4 + 1) / 3); // respect load factor
    Terms = new java.util.Vector<String>(numFeatures);
    Terms.setSize(numFeatures);
    System.out.println("the top " + numFeatures + " features will be used");
    for (int i = numFeatures - 1; i > -1; --i) { // reverse order, to put top terms first
        TermPair t = topTerms.poll(); // least remaining
        TermIndices.put(t.Term, i);
        Terms.set(i, t.Term);
        // System.out.println("Including term " + t.Term + " (" + t.DocCount + ")");
    }
}

From source file:org.mule.util.store.MonitoredObjectStoreWrapper.java

public void expire() {
    try {//from w w w  . ja va 2 s .c  om
        final long now = System.nanoTime();
        List<Serializable> keys = allKeys();
        int excess = (allKeys().size() - maxEntries);
        if (maxEntries > 0 && excess > 0) {
            PriorityQueue<StoredObject<T>> q = new PriorityQueue<StoredObject<T>>(excess,
                    new Comparator<StoredObject<T>>() {

                        @Override
                        public int compare(StoredObject<T> paramT1, StoredObject<T> paramT2) {
                            return paramT2.timestamp.compareTo(paramT1.timestamp);
                        }
                    });
            long youngest = Long.MAX_VALUE;
            for (Serializable key : keys) {
                StoredObject<T> obj = getStore().retrieve(key);
                //TODO extract the entryTTL>0 outside of loop
                if (entryTTL > 0 && TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) {
                    remove(key);
                    excess--;
                    if (excess > 0 && q.size() > excess) {
                        q.poll();
                        youngest = q.peek().timestamp;
                    }
                } else {
                    if (excess > 0 && (q.size() < excess || obj.timestamp < youngest)) {
                        q.offer(obj);
                        youngest = q.peek().timestamp;
                    }
                    if (excess > 0 && q.size() > excess) {
                        q.poll();
                        youngest = q.peek().timestamp;
                    }

                }
            }
            for (int i = 0; i < excess; i++) {
                Serializable key = q.poll().key;
                remove(key);
            }
        } else {
            if (entryTTL > 0) {
                for (Serializable key : keys) {
                    StoredObject<T> obj = getStore().retrieve(key);
                    if (TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) {
                        remove(key);
                    }
                }
            }
        }
    } catch (Exception e) {
        logger.warn("Running expirty on " + baseStore + " threw " + e + ":" + e.getMessage());
    }
}

From source file:edu.snu.leader.hierarchy.simple.Individual.java

/**
 * Finds the nearest neighbors for this individual
 *
 * @param simState// w w w  .  j  av  a2 s .  c o  m
 */
private void findNearestNeighbors(SimulationState simState) {
    _LOG.trace("Entering findNearestNeighbors( simState )");

    // Get the number of nearest neighbors
    _nearestNeighborCount = simState.getNearestNeighborCount();

    // Build a priority queue to sort things for us
    PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>();

    // Iterate through all the individuals
    Iterator<Individual> indIter = simState.getAllIndividuals().iterator();
    while (indIter.hasNext()) {
        // Get the individual
        Individual ind = indIter.next();

        // If it is us, continue on
        if (_id.equals(ind._id)) {
            continue;
        }

        // Build a neighbor out of it and put it in the queue
        Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind);
        sortedNeighbors.add(neighbor);
    }

    // Get the "nearest" neighbors
    int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount);
    for (int i = 0; i < count; i++) {
        _nearestNeighbors.add(sortedNeighbors.poll());
    }

    _LOG.trace("Leaving findNearestNeighbors( simState )");
}

From source file:org.apache.storm.daemon.logviewer.utils.DirectoryCleaner.java

/**
 * If totalSize of files exceeds the either the per-worker quota or global quota,
 * Logviewer deletes oldest inactive log files in a worker directory or in all worker dirs.
 * We use the parameter forPerDir to switch between the two deletion modes.
 *
 * @param dirs the list of directories to be scanned for deletion
 * @param quota the per-dir quota or the total quota for the all directories
 * @param forPerDir if true, deletion happens for a single dir; otherwise, for all directories globally
 * @param activeDirs only for global deletion, we want to skip the active logs in activeDirs
 * @return number of files deleted/*ww  w.j a v a  2  s . co  m*/
 */
public DeletionMeta deleteOldestWhileTooLarge(List<Path> dirs, long quota, boolean forPerDir,
        Set<Path> activeDirs) throws IOException {
    long totalSize = 0;
    for (Path dir : dirs) {
        try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) {
            for (Path path : stream) {
                totalSize += Files.size(path);
            }
        }
    }
    LOG.debug("totalSize: {} quota: {}", totalSize, quota);
    long toDeleteSize = totalSize - quota;
    if (toDeleteSize <= 0) {
        return DeletionMeta.EMPTY;
    }

    int deletedFiles = 0;
    long deletedSize = 0;
    // the oldest pq_size files in this directory will be placed in PQ, with the newest at the root
    PriorityQueue<Pair<Path, FileTime>> pq = new PriorityQueue<>(PQ_SIZE,
            Comparator.comparing((Pair<Path, FileTime> p) -> p.getRight()).reversed());
    int round = 0;
    final Set<Path> excluded = new HashSet<>();
    while (toDeleteSize > 0) {
        LOG.debug("To delete size is {}, start a new round of deletion, round: {}", toDeleteSize, round);
        for (Path dir : dirs) {
            try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) {
                for (Path path : stream) {
                    if (!excluded.contains(path)) {
                        if (isFileEligibleToSkipDelete(forPerDir, activeDirs, dir, path)) {
                            excluded.add(path);
                        } else {
                            Pair<Path, FileTime> p = Pair.of(path, Files.getLastModifiedTime(path));
                            if (pq.size() < PQ_SIZE) {
                                pq.offer(p);
                            } else if (p.getRight().toMillis() < pq.peek().getRight().toMillis()) {
                                pq.poll();
                                pq.offer(p);
                            }
                        }
                    }
                }
            }
        }
        if (!pq.isEmpty()) {
            // need to reverse the order of elements in PQ to delete files from oldest to newest
            Stack<Pair<Path, FileTime>> stack = new Stack<>();
            while (!pq.isEmpty()) {
                stack.push(pq.poll());
            }
            while (!stack.isEmpty() && toDeleteSize > 0) {
                Pair<Path, FileTime> pair = stack.pop();
                Path file = pair.getLeft();
                final String canonicalPath = file.toAbsolutePath().normalize().toString();
                final long fileSize = Files.size(file);
                final long lastModified = pair.getRight().toMillis();
                //Original implementation doesn't actually check if delete succeeded or not.
                try {
                    Utils.forceDelete(file.toString());
                    LOG.info("Delete file: {}, size: {}, lastModified: {}", canonicalPath, fileSize,
                            lastModified);
                    toDeleteSize -= fileSize;
                    deletedSize += fileSize;
                    deletedFiles++;
                } catch (IOException e) {
                    excluded.add(file);
                }
            }
            pq.clear();
            round++;
            if (round >= MAX_ROUNDS) {
                if (forPerDir) {
                    LOG.warn(
                            "Reach the MAX_ROUNDS: {} during per-dir deletion, you may have too many files in "
                                    + "a single directory : {}, will delete the rest files in next interval.",
                            MAX_ROUNDS, dirs.get(0).toAbsolutePath().normalize());
                } else {
                    LOG.warn("Reach the MAX_ROUNDS: {} during global deletion, you may have too many files, "
                            + "will delete the rest files in next interval.", MAX_ROUNDS);
                }
                break;
            }
        } else {
            LOG.warn("No more files able to delete this round, but {} is over quota by {} MB",
                    forPerDir ? "this directory" : "root directory", toDeleteSize * 1e-6);
        }
    }
    return new DeletionMeta(deletedSize, deletedFiles);
}

From source file:io.warp10.script.functions.OPTDTW.java

@Override
public Object apply(WarpScriptStack stack) throws WarpScriptException {
    Object o = stack.pop();/* ww w .  ja va  2s. co m*/

    if (!(o instanceof Number)) {
        throw new WarpScriptException(
                getName() + " expects a count of best restults to return on top of the stack.");
    }

    int count = ((Number) o).intValue();

    o = stack.pop();

    if (!(o instanceof List)) {
        throw new WarpScriptException(getName() + " expects a numeric list to use as query below the count.");
    }

    double[] query = new double[((List) o).size()];
    int i = 0;
    for (Object oo : (List) o) {
        query[i++] = ((Number) oo).doubleValue();
    }

    // Z-Normalize query
    double[] musigma = DoubleUtils.musigma(query, true);
    for (i = 0; i < query.length; i++) {
        query[i] = (query[i] - musigma[0]) / musigma[1];
    }

    o = stack.pop();

    if (!(o instanceof List)) {
        throw new WarpScriptException(getName()
                + " expects a numeric list as the sequence in which to find best matches below the 'query' list.");
    }

    double[] sequence = new double[((List) o).size()];
    i = 0;
    for (Object oo : (List) o) {
        sequence[i++] = ((Number) oo).doubleValue();
    }

    if (sequence.length <= query.length) {
        throw new WarpScriptException(
                getName() + " expects the query list to be shorter than the sequence list.");
    }

    double mindist = 0.0;

    PriorityQueue<Pair<Integer, Double>> distances = new PriorityQueue<Pair<Integer, Double>>(
            new Comparator<Pair<Integer, Double>>() {
                @Override
                public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });

    double[] subsequence = new double[query.length];

    for (i = 0; i <= sequence.length - query.length; i++) {
        System.arraycopy(sequence, i, subsequence, 0, query.length);
        // Z-Normalize the subsequence
        musigma = DoubleUtils.musigma(subsequence, true);
        for (int j = 0; j < subsequence.length; j++) {
            subsequence[j] = (subsequence[j] - musigma[0]) / musigma[1];
        }
        double dist = dtw.compute(query, 0, query.length, subsequence, 0, query.length, mindist);

        if (dist < 0) {
            continue;
        }

        distances.add(new Pair<Integer, Double>(i, dist));

        //
        // If the priority queue is of 'count' size, retrieve the largest distance and
        // use it as the threshold for the DTW computation
        //

        if (count > 0 && distances.size() >= count) {
            Object adist[] = distances.toArray();
            mindist = ((Pair<Integer, Double>) adist[count - 1]).getValue();
        }
    }

    List<List<Object>> results = new ArrayList<List<Object>>();

    while (!distances.isEmpty()) {

        Pair<Integer, Double> entry = distances.poll();

        List<Object> result = new ArrayList<Object>();
        result.add(entry.getKey());
        result.add(entry.getValue());
        results.add(result);

        if (count > 0 && count == results.size()) {
            break;
        }
    }

    stack.push(results);

    return stack;
}

From source file:delfos.group.grs.consensus.ConsensusGRS.java

public File getConsensusOutputXMLwithDesiredConsensusDegree(File consensusInputXML, double consensusDegree) {
    File consensusOutputDirectory = (File) getParameterValue(CONSENSUS_OUTPUT_FILES_DIRECTORY);

    String consensusInputXMLFileNameNoExtension = consensusInputXML.getName().substring(0,
            consensusInputXML.getName().lastIndexOf("."));

    String consensusInputXMLInOutputDirectoryAbsolutePath = consensusOutputDirectory.getAbsolutePath()
            + File.separator + consensusInputXMLFileNameNoExtension;

    File consensusInputXMLInOutputDirectory = new File(consensusInputXMLInOutputDirectoryAbsolutePath);

    if (!consensusInputXML.exists()) {
        Global.showWarning("The input XML '" + consensusInputXMLInOutputDirectory
                + "' does not exists in the output directory");
        return null;
    }/*from ww  w.  j  a v a  2  s . co m*/

    if (!consensusOutputDirectory.exists()) {
        Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' not exists");
        return null;
    }

    if (!consensusOutputDirectory.isDirectory()) {
        Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' is not a directory");
        return null;
    }

    List<File> childrenFiles = new ArrayList<>(Arrays.asList(consensusOutputDirectory.listFiles()));
    PriorityQueue<PriorityItem<File>> queue = new PriorityQueue<>(Collections.reverseOrder());

    for (File consensusOutputFile : childrenFiles) {
        final String outputFileNameNoExtension = consensusOutputFile.getName().substring(0,
                consensusOutputFile.getName().lastIndexOf("."));
        if (outputFileNameNoExtension.startsWith(consensusInputXMLFileNameNoExtension)
                && outputFileNameNoExtension.contains("Consenso")) {
            try {
                Global.showln(consensusOutputFile.getAbsolutePath());
                double thisFileConsensusDegree = ConsensusOfIndividualRecommendationsToXML
                        .readConsensusOutputXML(consensusOutputFile).consensusDegree;

                queue.add(new PriorityItem<>(consensusOutputFile, thisFileConsensusDegree));
            } catch (JDOMException | IOException ex) {
                Global.showWarning(ex);
            }
        }
    }

    if (queue.isEmpty()) {
        return null;
    }

    if (Global.isVerboseAnnoying()) {
        Global.showInfoMessage("Found " + queue.size() + " consensus files");
    }

    while (!queue.isEmpty()) {
        PriorityItem<File> priorityItem = queue.poll();

        double consensusDegreeThisFile = priorityItem.getPriority();

        if (consensusDegreeThisFile >= consensusDegree) {
            return priorityItem.getKey();
        }
    }

    throw new IllegalStateException(
            "Consensus degree not reached for '" + consensusInputXMLFileNameNoExtension + "'");
}