List of usage examples for java.util PriorityQueue poll
public E poll()
From source file:com.joliciel.jochre.analyser.BeamSearchImageAnalyser.java
public void analyseInternal(JochreImage image) { LOG.debug("Analysing image " + image.getId()); if (currentMonitor != null) { currentMonitor.setCurrentAction("imageMonitor.analysingImage", new Object[] { image.getPage().getIndex() }); }/*from w w w . j av a 2 s . co m*/ for (LetterGuessObserver observer : observers) { observer.onImageStart(image); } if (totalShapeCount < 0) totalShapeCount = image.getShapeCount(); for (Paragraph paragraph : image.getParagraphs()) { LOG.debug("Analysing paragraph " + paragraph.getIndex() + " (id=" + paragraph.getId() + ")"); List<LetterSequence> holdoverSequences = null; for (RowOfShapes row : paragraph.getRows()) { LOG.debug("Analysing row " + row.getIndex() + " (id=" + row.getId() + ")"); for (GroupOfShapes group : row.getGroups()) { if (group.isSkip()) { LOG.debug("Skipping group " + group.getIndex() + " (id=" + group.getId() + ")"); continue; } LOG.debug("Analysing group " + group.getIndex() + " (id=" + group.getId() + ")"); int width = group.getRight() - group.getLeft() + 1; List<ShapeSequence> shapeSequences = null; if (boundaryDetector != null) { shapeSequences = boundaryDetector.findBoundaries(group); } else { // simply add this groups shape's shapeSequences = new ArrayList<ShapeSequence>(); ShapeSequence shapeSequence = boundaryService.getEmptyShapeSequence(); for (Shape shape : group.getShapes()) shapeSequence.addShape(shape); shapeSequences.add(shapeSequence); } // Perform a beam search to guess the most likely sequence for this word TreeMap<Integer, PriorityQueue<LetterSequence>> heaps = new TreeMap<Integer, PriorityQueue<LetterSequence>>(); // prime a starter heap with the n best shape boundary analyses for this group PriorityQueue<LetterSequence> starterHeap = new PriorityQueue<LetterSequence>(1); for (ShapeSequence shapeSequence : shapeSequences) { LetterSequence emptySequence = this.getLetterGuesserService() .getEmptyLetterSequence(shapeSequence); starterHeap.add(emptySequence); } heaps.put(0, starterHeap); PriorityQueue<LetterSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Integer, PriorityQueue<LetterSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) LOG.trace("heap for index: " + heapEntry.getKey().intValue() + ", width: " + width); if (heapEntry.getKey().intValue() == width) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<LetterSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { LetterSequence history = previousHeap.poll(); ShapeInSequence shapeInSequence = history.getNextShape(); Shape shape = shapeInSequence.getShape(); if (LOG.isTraceEnabled()) { LOG.trace("Sequence " + history + ", shape: " + shape); } LogUtils.logMemory(LOG); int position = 0; if (Linguistics.getInstance(image.getPage().getDocument().getLocale()) .isLeftToRight()) { position = shape.getRight() - group.getLeft() + 1; } else { position = group.getRight() - shape.getLeft() + 1; } PriorityQueue<LetterSequence> heap = heaps.get(position); if (heap == null) { heap = new PriorityQueue<LetterSequence>(); heaps.put(position, heap); } MONITOR.startTask("guess letter"); try { letterGuesser.guessLetter(shapeInSequence, history); } finally { MONITOR.endTask(); } MONITOR.startTask("heap sort"); try { for (Decision<Letter> letterGuess : shape.getLetterGuesses()) { // leave out very low probability outcomes if (letterGuess.getProbability() > this.minOutcomeWeight) { LetterSequence sequence = this.getLetterGuesserService() .getLetterSequencePlusOne(history); sequence.add(letterGuess.getOutcome()); sequence.addDecision(letterGuess); heap.add(sequence); } // weight big enough to include } // next letter guess for this shape } finally { MONITOR.endTask(); } } // next history in heap } // any more heaps? LetterSequence bestSequence = null; boolean shouldCombineWithHoldover = false; boolean isHoldover = false; MONITOR.startTask("best sequence"); try { List<LetterSequence> finalSequences = new ArrayList<LetterSequence>(); for (int i = 0; i < this.beamWidth; i++) { if (finalHeap.isEmpty()) break; finalSequences.add(finalHeap.poll()); } if (this.getMostLikelyWordChooser() == null) { // most likely sequence is on top of the last heap bestSequence = finalSequences.get(0); } else { // get most likely sequence using lexicon if (holdoverSequences != null) { // we have a holdover from the previous row ending with a dash bestSequence = this.getMostLikelyWordChooser().chooseMostLikelyWord(finalSequences, holdoverSequences, this.beamWidth); shouldCombineWithHoldover = true; } else { // check if this is the last group on the row and could end with a dash boolean shouldBeHeldOver = false; if (group.getIndex() == row.getGroups().size() - 1 && row.getIndex() < paragraph.getRows().size() - 1) { for (LetterSequence letterSequence : finalSequences) { if (letterSequence.toString().endsWith("-")) { shouldBeHeldOver = true; break; } } } if (shouldBeHeldOver) { holdoverSequences = finalSequences; isHoldover = true; } else { // simplest case: no holdover bestSequence = this.getMostLikelyWordChooser() .chooseMostLikelyWord(finalSequences, this.beamWidth); } } // have we holdover sequences? } // have we a most likely word chooser? if (!isHoldover) { for (LetterGuessObserver observer : observers) { observer.onBeamSearchEnd(bestSequence, finalSequences, holdoverSequences); } } } finally { MONITOR.endTask(); } MONITOR.startTask("assign letter"); try { if (shouldCombineWithHoldover) { holdoverSequences = null; } if (!isHoldover) { for (LetterGuessObserver observer : observers) { observer.onStartSequence(bestSequence); } group.setBestLetterSequence(bestSequence); int i = 0; for (ShapeInSequence shapeInSequence : bestSequence.getUnderlyingShapeSequence()) { String bestOutcome = bestSequence.get(i).getString(); this.assignLetter(shapeInSequence, bestOutcome); i++; } // next shape for (LetterGuessObserver observer : observers) { observer.onGuessSequence(bestSequence); } } this.shapeCount += group.getShapes().size(); if (this.currentMonitor != null) { double progress = (double) shapeCount / (double) totalShapeCount; LOG.debug("progress: " + progress); currentMonitor.setPercentComplete(progress); } } finally { MONITOR.endTask(); } } // next group } // next row } // next paragraph for (LetterGuessObserver observer : observers) { observer.onImageEnd(); } }
From source file:com.linkedin.pinot.query.plan.PlanMakerTest.java
@Test public void testInnerSegmentPlanMakerForSelectionNoFilter() { BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest(); PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest); rootPlanNode.showTree(""); IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) rootPlanNode.run().nextBlock(); PriorityQueue<Serializable[]> retPriorityQueue = (PriorityQueue<Serializable[]>) resultBlock .getSelectionResult();//w w w. j a va 2 s . c om while (!retPriorityQueue.isEmpty()) { Serializable[] row = retPriorityQueue.poll(); LOGGER.debug(Arrays.toString(row)); assertEquals(row[0], 9); assertEquals(row[1], 99); } }
From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java
@Override public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) { MONITOR.startTask("tagSentence"); try {//from ww w . j a va 2 s.c om MONITOR.startTask("apply filters"); try { for (TokenSequence tokenSequence : tokenSequences) { for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) { tokenFilter.apply(tokenSequence); } } } finally { MONITOR.endTask("apply filters"); } int sentenceLength = tokenSequences.get(0).getText().length(); TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>(); PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>(); for (TokenSequence tokenSequence : tokenSequences) { // add an empty PosTagSequence for each token sequence PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0); emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(emptySequence); } heaps.put(0.0, heap0); PriorityQueue<PosTagSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) { LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength); } if (heapEntry.getKey() == sentenceLength) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { PosTagSequence history = previousHeap.poll(); Token token = history.getNextToken(); if (LOG.isTraceEnabled()) { LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString()); LOG.trace("Prob: " + df.format(history.getScore())); LOG.trace("Token: " + token.getText()); StringBuilder sb = new StringBuilder(); for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) { if (oneToken.equals(token)) sb.append("[" + oneToken + "]"); else sb.append(oneToken); } LOG.trace(sb.toString()); } PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history); List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>(); // test the positive rules on the current token boolean ruleApplied = false; if (posTaggerPositiveRules != null) { MONITOR.startTask("check rules"); try { for (PosTaggerRule rule : posTaggerPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet() .createDefaultDecision(rule.getTag()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current token List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) { MONITOR.startTask(posTaggerFeature.getCollectionName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = posTaggerFeature.check(context, env); if (featureResult != null) featureResults.add(featureResult); } finally { MONITOR.endTask(posTaggerFeature.getCollectionName()); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> result : featureResults) { LOG.trace(result.toString()); } } } finally { MONITOR.endTask("analyse features"); } // evaluate the feature results using the maxent model MONITOR.startTask("make decision"); decisions = this.decisionMaker.decide(featureResults); MONITOR.endTask("make decision"); for (ClassificationObserver<PosTag> observer : this.observers) { observer.onAnalyse(token, featureResults, decisions); } // apply the negative rules Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>(); if (posTaggerNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (PosTaggerRule rule : posTaggerNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedPosTags.add(rule.getTag()); if (LOG.isTraceEnabled()) { LOG.trace( "Rule applies. Eliminating posTag: " + rule.getTag().getCode()); } } } if (eliminatedPosTags.size() > 0) { List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (!eliminatedPosTags.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } // is this a known word in the lexicon? MONITOR.startTask("apply constraints"); try { if (LOG.isTraceEnabled()) { String posTags = ""; for (PosTag onePosTag : token.getPossiblePosTags()) { posTags += onePosTag.getCode() + ","; } LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags); } List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (decision.getProbability() >= MIN_PROB_TO_STORE) { decisionShortList.add(decision); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } } finally { MONITOR.endTask("apply constraints"); } } // has a rule been applied? // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt MONITOR.startTask("heap sort"); for (Decision<PosTag> decision : decisions) { if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability()); PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token, decision); PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history); sequence.addPosTaggedToken(posTaggedToken); if (decision.isStatistical()) sequence.addDecision(decision); double heapIndex = token.getEndIndex(); // add another half for an empty token, to differentiate it from regular ones if (token.getStartIndex() == token.getEndIndex()) heapIndex += 0.5; // if it's the last token, make sure we end if (token.getIndex() == sequence.getTokenSequence().size() - 1) heapIndex = sentenceLength; if (LOG.isTraceEnabled()) LOG.trace("Heap index: " + heapIndex); PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex); if (heap == null) { heap = new PriorityQueue<PosTagSequence>(); heaps.put(heapIndex, heap); } heap.add(sequence); } // next outcome for this token MONITOR.endTask("heap sort"); } // next history } // next atomic index // return the best sequence on the heap List<PosTagSequence> sequences = new ArrayList<PosTagSequence>(); int i = 0; while (!finalHeap.isEmpty()) { sequences.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } // apply post-processing filters LOG.debug("####Final postag sequences:"); int j = 1; for (PosTagSequence sequence : sequences) { if (LOG.isDebugEnabled()) { LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Sequence before filters: " + sequence); } for (PosTagSequenceFilter filter : this.postProcessingFilters) filter.apply(sequence); if (LOG.isDebugEnabled()) { LOG.debug("Sequence after filters: " + sequence); } } return sequences; } finally { MONITOR.endTask("tagSentence"); } }
From source file:com.joliciel.csvLearner.features.BestFeatureFinder.java
public List<NameValuePair> getBestFeatures(GenericEvents events, String testOutcome, int featureCount) { LOG.debug("testOutcome: " + testOutcome); List<NameValuePair> bestFeatures = new ArrayList<NameValuePair>(); RealValueFeatureEvaluator evaluator = new RealValueFeatureEvaluator(); evaluator.setFeatureSplitter(featureSplitter); try {/*from w w w .jav a2 s.c om*/ Set<String> features = events.getFeatures(); PriorityQueue<NameValuePair> heap = new PriorityQueue<NameValuePair>(features.size(), new NameValueDescendingComparator()); double eventSpaceEntropy = -1; for (String feature : features) { List<Double> featureEntropies = evaluator.evaluateFeature(events, feature, testOutcome); double informationGain = featureEntropies.get(0) - featureEntropies.get(featureEntropies.size() - 1); if (eventSpaceEntropy < 0) eventSpaceEntropy = featureEntropies.get(0); NameValuePair pair = new NameValuePair(feature, informationGain); heap.add(pair); } bestFeatures.add(new NameValuePair(TOTAL_ENTROPY, eventSpaceEntropy)); for (int i = 0; i < featureCount; i++) { NameValuePair pair = heap.poll(); if (pair == null) break; LOG.debug("feature: " + pair.getName() + ", " + pair.getValue()); bestFeatures.add(pair); } heap = null; } finally { evaluator.logPerformance(); } return bestFeatures; }
From source file:eu.stratosphere.pact.test.util.TestBase.java
/** * Compares the expectedResultString and the file(s) in the HDFS linewise. * Both results (expected and computed) are held in memory. Hence, this * method should not be used to compare large results. * //from w ww . j av a 2 s .co m * The line comparator is used to compare lines from the expected and result set. * * @param expectedResult * @param hdfsPath * @param comp Line comparator */ protected void compareResultsByLinesInMemory(String expectedResultStr, String resultPath, Comparator<String> comp) throws Exception { ArrayList<String> resultFiles = new ArrayList<String>(); // Determine all result files if (getFilesystemProvider().isDir(resultPath)) { for (String file : getFilesystemProvider().listFiles(resultPath)) { if (!getFilesystemProvider().isDir(file)) { resultFiles.add(resultPath + "/" + file); } } } else { resultFiles.add(resultPath); } // collect lines of all result files PriorityQueue<String> computedResult = new PriorityQueue<String>(); for (String resultFile : resultFiles) { // read each result file InputStream is = getFilesystemProvider().getInputStream(resultFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); // collect lines while (line != null) { computedResult.add(line); line = reader.readLine(); } reader.close(); } PriorityQueue<String> expectedResult = new PriorityQueue<String>(); StringTokenizer st = new StringTokenizer(expectedResultStr, "\n"); while (st.hasMoreElements()) { expectedResult.add(st.nextToken()); } // log expected and computed results if (LOG.isDebugEnabled()) { LOG.debug("Expected: " + expectedResult); LOG.debug("Computed: " + computedResult); } Assert.assertEquals("Computed and expected results have different size", expectedResult.size(), computedResult.size()); while (!expectedResult.isEmpty()) { String expectedLine = expectedResult.poll(); String computedLine = computedResult.poll(); if (LOG.isDebugEnabled()) LOG.debug("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">"); Assert.assertEquals("Computed and expected lines differ", expectedLine, computedLine); } }
From source file:org.apache.accumulo.core.file.blockfile.cache.LruBlockCache.java
/** * Eviction method./*w w w . ja va 2s . c o m*/ */ void evict() { // Ensure only one eviction at a time if (!evictionLock.tryLock()) return; try { evictionInProgress = true; long bytesToFree = size.get() - minSize(); LOG.debug("Block cache LRU eviction started. Attempting to free " + bytesToFree + " bytes"); if (bytesToFree <= 0) return; // Instantiate priority buckets BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize, singleSize()); BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize, multiSize()); BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize, memorySize()); // Scan entire map putting into appropriate buckets for (CachedBlock cachedBlock : map.values()) { switch (cachedBlock.getPriority()) { case SINGLE: { bucketSingle.add(cachedBlock); break; } case MULTI: { bucketMulti.add(cachedBlock); break; } case MEMORY: { bucketMemory.add(cachedBlock); break; } } } PriorityQueue<BlockBucket> bucketQueue = new PriorityQueue<BlockBucket>(3); bucketQueue.add(bucketSingle); bucketQueue.add(bucketMulti); bucketQueue.add(bucketMemory); int remainingBuckets = 3; long bytesFreed = 0; BlockBucket bucket; while ((bucket = bucketQueue.poll()) != null) { long overflow = bucket.overflow(); if (overflow > 0) { long bucketBytesToFree = Math.min(overflow, (long) Math.ceil((bytesToFree - bytesFreed) / (double) remainingBuckets)); bytesFreed += bucket.free(bucketBytesToFree); } remainingBuckets--; } float singleMB = ((float) bucketSingle.totalSize()) / ((float) (1024 * 1024)); float multiMB = ((float) bucketMulti.totalSize()) / ((float) (1024 * 1024)); float memoryMB = ((float) bucketMemory.totalSize()) / ((float) (1024 * 1024)); LOG.debug("Block cache LRU eviction completed. " + "Freed " + bytesFreed + " bytes. " + "Priority Sizes: " + "Single=" + singleMB + "MB (" + bucketSingle.totalSize() + "), " + "Multi=" + multiMB + "MB (" + bucketMulti.totalSize() + ")," + "Memory=" + memoryMB + "MB (" + bucketMemory.totalSize() + ")"); } finally { stats.evict(); evictionInProgress = false; evictionLock.unlock(); } }
From source file:com.joliciel.talismane.tokeniser.patterns.IntervalPatternTokeniser.java
@Override public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) { MONITOR.startTask("tokeniseWithDecisions"); try {//from w w w. j a va2s . c o m // apply any pre-tokenisation decisions via filters // we only want one placeholder per start index - the first one that gets added Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>(); for (TokenFilter tokenFilter : this.tokenFilters) { Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText()); for (TokenPlaceholder placeholder : myPlaceholders) { if (!placeholderMap.containsKey(placeholder.getStartIndex())) { placeholderMap.put(placeholder.getStartIndex(), placeholder); } } if (LOG.isTraceEnabled()) { if (myPlaceholders.size() > 0) { LOG.trace("TokenFilter: " + tokenFilter); LOG.trace("placeholders: " + myPlaceholders); } } } Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values()); // Initially, separate the sentence into tokens using the separators provided TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS, placeholders); // apply any pre-processing filters that have been added for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(tokenSequence); } // Assign each separator its default value List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence); List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>( defaultOutcomes.size()); for (TokeniserOutcome outcome : defaultOutcomes) { Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory .createDefaultDecision(outcome); tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName()); tokeniserDecision.addAuthority("_" + "DefaultDecision"); defaultDecisions.add(tokeniserDecision); } List<TokenisedAtomicTokenSequence> sequences = null; // For each test pattern, see if anything in the sentence matches it if (this.decisionMaker != null) { Set<Token> tokensToCheck = new HashSet<Token>(); MONITOR.startTask("pattern matching"); try { for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) { Set<Token> tokensToCheckForThisPattern = new HashSet<Token>(); List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence); for (TokenPatternMatchSequence tokenPatternMatch : matchesForThisPattern) { if (LOG.isTraceEnabled()) tokensToCheckForThisPattern.addAll(tokenPatternMatch.getTokensToCheck()); tokensToCheck.addAll(tokenPatternMatch.getTokensToCheck()); } if (LOG.isTraceEnabled()) { if (tokensToCheckForThisPattern.size() > 0) { LOG.trace("Parsed pattern: " + parsedPattern); LOG.trace("tokensToCheck: " + tokensToCheckForThisPattern); } } } } finally { MONITOR.endTask("pattern matching"); } // we want to create the n most likely token sequences // the sequence has to correspond to a token pattern // initially create a heap with a single, empty sequence PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); heap.add(emptySequence); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { if (LOG.isTraceEnabled()) { LOG.trace("Token : \"" + token.getText() + "\""); } // build a new heap for this iteration PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap; heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); // limit the heap breadth to K int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth() : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { TokenisedAtomicTokenSequence history = previousHeap.poll(); // Find the separating & non-separating decisions List<Decision<TokeniserOutcome>> decisions = null; if (tokensToCheck.contains(token)) { // test the features on the current token TokeniserContext context = new TokeniserContext(token, history); List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (TokeniserContextFeature<?> feature : tokeniserContextFeatures) { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(context, env); if (featureResult != null) { tokenFeatureResults.add(featureResult); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> featureResult : tokenFeatureResults) { LOG.trace(featureResult.toString()); } } } finally { MONITOR.endTask("analyse features"); } MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(tokenFeatureResults); for (ClassificationObserver<TokeniserOutcome> observer : this.observers) observer.onAnalyse(token, tokenFeatureResults, decisions); for (Decision<TokeniserOutcome> decision : decisions) { decision.addAuthority(this.getClass().getSimpleName()); for (TokenPatternMatch tokenMatch : token.getMatches()) { decision.addAuthority(tokenMatch.getPattern().toString()); } } } finally { MONITOR.endTask("make decision"); } } else { decisions = new ArrayList<Decision<TokeniserOutcome>>(); decisions.add(defaultDecisions.get(i)); } MONITOR.startTask("heap sort"); try { for (Decision<TokeniserOutcome> decision : decisions) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService .getTaggedToken(token, decision); TokenisedAtomicTokenSequence tokenisedSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); tokenisedSequence.add(taggedToken); if (decision.isStatistical()) tokenisedSequence.addDecision(decision); heap.add(tokenisedSequence); } } finally { MONITOR.endTask("heap sort"); } } // next sequence in the old heap i++; } // next token sequences = new ArrayList<TokenisedAtomicTokenSequence>(); i = 0; while (!heap.isEmpty()) { sequences.add(heap.poll()); i++; if (i >= this.getBeamWidth()) break; } } else { sequences = new ArrayList<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token, defaultDecisions.get(i++)); defaultSequence.add(taggedToken); } sequences.add(defaultSequence); } // have decision maker? LOG.debug("####Final token sequences:"); int j = 1; for (TokenisedAtomicTokenSequence sequence : sequences) { TokenSequence newTokenSequence = sequence.inferTokenSequence(); if (LOG.isDebugEnabled()) { LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Atomic sequence: " + sequence); LOG.debug("Resulting sequence: " + newTokenSequence); } // need to re-apply the pre-processing filters, because the tokens are all new // Question: why can't we conserve the initial tokens when they haven't changed at all? // Answer: because the tokenSequence and index in the sequence is referenced by the token. // Question: should we create a separate class, Token and TokenInSequence, // one with index & sequence access & one without? for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(newTokenSequence); } if (LOG.isDebugEnabled()) { LOG.debug("After filters: " + newTokenSequence); } } return sequences; } finally { MONITOR.endTask("tokeniseWithDecisions"); } }
From source file:edu.snu.leader.hierarchy.simple.test.TestIndividual.java
/** * Initialize the individual//w w w.ja v a 2 s .c o m * * @param allIndividuals */ public void initialize(List<TestIndividual> allIndividuals) { // Basically, we just need to find our neighbors // Build a priority queue to sort things for us PriorityQueue<TestNeighbor> sortedNeighbors = new PriorityQueue<TestNeighbor>(); // Iterate through all the individuals Iterator<TestIndividual> indIter = allIndividuals.iterator(); while (indIter.hasNext()) { // Get the individual TestIndividual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue TestNeighbor neighbor = new TestNeighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { _nearestNeighbors.add(sortedNeighbors.poll()); } }
From source file:de.tu_berlin.dima.aim3.querysuggestion.QuerySuggTCase.java
/** * Print results from the hdfs//ww w. j ava 2 s .c o m * * @param resultPath */ protected void printResults(String resultPath) throws Exception { ArrayList<String> resultFiles = new ArrayList<String>(); // Determine all result files if (getFilesystemProvider().isDir(resultPath)) { for (String file : getFilesystemProvider().listFiles(resultPath)) { if (!getFilesystemProvider().isDir(file)) { resultFiles.add(resultPath + "/" + file); } } } else { resultFiles.add(resultPath); } // collect lines of all result files PriorityQueue<String> computedResult = new PriorityQueue<String>(); for (String resultFile : resultFiles) { // read each result file InputStream is = getFilesystemProvider().getInputStream(resultFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); // collect lines while (line != null) { computedResult.add(line); line = reader.readLine(); } reader.close(); } // Assert.assertEquals("Computed and expected results have different size", // expectedResult.size(), computedResult.size()); System.out.println("RESULTS:"); while (!computedResult.isEmpty()) { String computedLine = computedResult.poll(); System.out.println(computedLine); // if (LOG.isDebugEnabled()) // LOG.debug("compLine: <" + computedLine + ">"); // System.out.println("compLine: <" + computedLine + ">"); // Assert.assertEquals("Computed and expected lines differ", // expectedLine, computedLine); } }
From source file:org.mule.util.store.MonitoredObjectStoreWrapper.java
public void expire() { try {/* w w w .j a v a2 s.co m*/ final long now = System.nanoTime(); List<Serializable> keys = allKeys(); int excess = (allKeys().size() - maxEntries); if (maxEntries > 0 && excess > 0) { PriorityQueue<StoredObject<T>> q = new PriorityQueue<StoredObject<T>>(excess, new Comparator<StoredObject<T>>() { @Override public int compare(StoredObject<T> paramT1, StoredObject<T> paramT2) { return paramT2.timestamp.compareTo(paramT1.timestamp); } }); long youngest = Long.MAX_VALUE; for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); //TODO extract the entryTTL>0 outside of loop if (entryTTL > 0 && TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); excess--; if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } else { if (excess > 0 && (q.size() < excess || obj.timestamp < youngest)) { q.offer(obj); youngest = q.peek().timestamp; } if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } } for (int i = 0; i < excess; i++) { Serializable key = q.poll().key; remove(key); } } else { if (entryTTL > 0) { for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); if (TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); } } } } } catch (Exception e) { logger.warn("Running expirty on " + baseStore + " threw " + e + ":" + e.getMessage()); } }