List of usage examples for java.util PriorityQueue PriorityQueue
public PriorityQueue()
From source file:edu.snu.leader.hidden.SpatialIndividual.java
/** * Finds the nearest neighbors for this individual * * @param simState/*w ww.j a va 2 s. com*/ */ public void findNearestNeighbors(SimulationState simState) { _LOG.trace("Entering findNearestNeighbors( simState )"); // Get the number of nearest neighbors _nearestNeighborCount = simState.getNearestNeighborCount(); // Build a priority queue to sort things for us PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>(); // Iterate through all the individuals Iterator<SpatialIndividual> indIter = simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { // Get the individual SpatialIndividual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { Neighbor neighbor = sortedNeighbors.poll(); _nearestNeighbors.add(neighbor); neighbor.getIndividual().signalNearestNeighborStatus(this); // _LOG.debug( "Nearest neighbor: id=[" // + getID() // + "] neighbor=[" // + neighbor.getIndividual().getID() // + "]" ); } _LOG.trace("Leaving findNearestNeighbors( simState )"); }
From source file:org.onebusaway.uk.network_rail.gtfs_realtime.graph.PositionBerthToStanoxGraphMain.java
private void interpolateBerthLocations() { int index = 0; for (RawBerthNode rootNode : _berthNodesToLocations.keySet()) { if (index % 100 == 0) { _log.info("node=" + index + "/" + _berthNodesToLocations.keySet().size()); }//www . ja v a 2 s .co m index++; Location fromLocation = _berthNodesToLocations.get(rootNode); Queue<OrderedRawBerthNode> queue = new PriorityQueue<OrderedRawBerthNode>(); queue.add(new OrderedRawBerthNode(rootNode, null, 0.0)); Map<RawBerthNode, RawBerthNode> parents = new HashMap<RawBerthNode, RawBerthNode>(); Set<RawBerthNode> visited = new HashSet<RawBerthNode>(); while (!queue.isEmpty()) { OrderedRawBerthNode currentNode = queue.poll(); RawBerthNode node = currentNode.getNode(); if (!visited.add(node)) { continue; } parents.put(node, currentNode.getParent()); Location toLocation = _berthNodesToLocations.get(node); if (currentNode.getParent() != null && toLocation != null) { List<RawBerthNode> path = new ArrayList<RawBerthNode>(); RawBerthNode last = node; while (last != null) { path.add(last); last = parents.get(last); } if (path.size() <= 2) { break; } Collections.reverse(path); BerthPath berthPath = new BerthPath(path, currentNode.getDistance()); double d = fromLocation.getDistance(toLocation); if (d > 30000) { continue; } RailwayPath railwayPath = _railwayShapeService.getPath(fromLocation.getPoint(), toLocation.getPoint()); if (railwayPath != null) { snapBerthsToRailwayPath(berthPath, railwayPath); } break; } else { for (Map.Entry<RawBerthNode, List<Integer>> entry : node.getOutgoing().entrySet()) { RawBerthNode outgoing = entry.getKey(); int avgDuration = RawNode.average(entry.getValue()); queue.add(new OrderedRawBerthNode(outgoing, node, currentNode.getDistance() + avgDuration)); } } } } }
From source file:org.kuali.rice.krms.framework.engine.TermResolutionEngineImpl.java
/** * * @param termName//from ww w . j ava 2 s .c o m * @return List<{@link TermResolverKey}> */ protected List<TermResolverKey> buildTermResolutionPlan(String termName) { // our result List<TermResolverKey> resolutionPlan = null; // Holds the resolvers we've visited, along with the needed metadata for generating our final plan Map<TermResolverKey, Visited> visitedByKey = new HashMap<TermResolverKey, Visited>(); // this holds a least cost first list of nodes remaining to be explored PriorityQueue<ToVisit> toVisits = new PriorityQueue<ToVisit>(); // nice grammar there cowboy // dummy resolver to be the root of this tree // Do I really need this? Yes, because there may be more than one resolver that resolves to the desired termName, // so this destination unifies the trees of those candidate resolvers TermResolver destination = createDestination(termName); // problem is we can't get this one out of the registry TermResolverKey destinationKey = new TermResolverKey(destination); LOG.debug("Beginning resolution tree search for " + termName); // seed our queue of resolvers to visit // need to be aware of null parent for root ToVisit toVisits.add(new ToVisit(0, destination, null)); // there may not be a viable plan boolean plannedToDestination = false; // We'll do a modified Dijkstra's shortest path algorithm, where at each leaf we see if we've planned out // termName resolution all the way up to the root, our destination. If so, we just reconstruct our plan. while (!plannedToDestination && toVisits.size() > 0) { // visit least cost node remaining ToVisit visiting = toVisits.poll(); LOG.debug("visiting " + visiting.getTermResolverKey()); // the resolver is the edge in our tree -- we don't get it directly from the termResolversByKey Map, because it could be our destination TermResolver resolver = getResolver(visiting.getTermResolverKey(), destination, destinationKey); TermResolver parent = getResolver(visiting.getParentKey(), destination, destinationKey); if (visitedByKey.containsKey(visiting.getTermResolverKey())) { continue; // We've already visited this one } Visited parentVisited = visitedByKey.get(visiting.getParentKey()); if (resolver == null) throw new RuntimeException("Unable to get TermResolver by its key"); Set<String> prereqs = resolver.getPrerequisites(); // keep track of any prereqs that we already have handy List<String> metPrereqs = new LinkedList<String>(); // see what prereqs we have already, and which we'll need to visit if (prereqs != null) for (String prereq : prereqs) { if (!termCache.containsKey(new Term(prereq, null))) { // enqueue all resolvers in toVisits List<TermResolver<?>> prereqResolvers = termResolversByOutput.get(prereq); if (prereqResolvers != null) for (TermResolver prereqResolver : prereqResolvers) { // Only TermResolvers that don't take paramaterized terms can be chained, so: // if the TermResolver doesn't take parameters, or it resolves the output termName if (CollectionUtils.isEmpty(prereqResolver.getParameterNames()) || termName.equals(prereqResolver.getOutput())) { // queue it up for visiting toVisits.add(new ToVisit(visiting.getCost() /* cost to get to this resolver */, prereqResolver, resolver)); } } } else { metPrereqs.add(prereq); } } // Build visited info Visited visited = buildVisited(resolver, parentVisited, metPrereqs); visitedByKey.put(visited.getResolverKey(), visited); plannedToDestination = isPlannedBackToDestination(visited, destinationKey, visitedByKey); } if (plannedToDestination) { // build result from Visited tree. resolutionPlan = new LinkedList<TermResolverKey>(); assembleLinearResolutionPlan(visitedByKey.get(destinationKey), visitedByKey, resolutionPlan); } return resolutionPlan; }
From source file:org.apache.hadoop.tools.rumen.Folder.java
public int run() throws IOException { class JobEntryComparator implements Comparator<Pair<LoggedJob, JobTraceReader>> { public int compare(Pair<LoggedJob, JobTraceReader> p1, Pair<LoggedJob, JobTraceReader> p2) { LoggedJob j1 = p1.first();/*from w w w .j a v a 2 s. c om*/ LoggedJob j2 = p2.first(); return (j1.getSubmitTime() < j2.getSubmitTime()) ? -1 : (j1.getSubmitTime() == j2.getSubmitTime()) ? 0 : 1; } } // we initialize an empty heap so if we take an error before establishing // a real one the finally code goes through Queue<Pair<LoggedJob, JobTraceReader>> heap = new PriorityQueue<Pair<LoggedJob, JobTraceReader>>(); try { LoggedJob job = reader.nextJob(); if (job == null) { LOG.error("The job trace is empty"); return EMPTY_JOB_TRACE; } // If starts-after time is specified, skip the number of jobs till we reach // the starting time limit. if (startsAfter > 0) { LOG.info("starts-after time is specified. Initial job submit time : " + job.getSubmitTime()); long approximateTime = job.getSubmitTime() + startsAfter; job = reader.nextJob(); long skippedCount = 0; while (job != null && job.getSubmitTime() < approximateTime) { job = reader.nextJob(); skippedCount++; } LOG.debug("Considering jobs with submit time greater than " + startsAfter + " ms. Skipped " + skippedCount + " jobs."); if (job == null) { LOG.error("No more jobs to process in the trace with 'starts-after'" + " set to " + startsAfter + "ms."); return EMPTY_JOB_TRACE; } LOG.info("The first job has a submit time of " + job.getSubmitTime()); } firstJobSubmitTime = job.getSubmitTime(); long lastJobSubmitTime = firstJobSubmitTime; int numberJobs = 0; long currentIntervalEnd = Long.MIN_VALUE; Path nextSegment = null; Outputter<LoggedJob> tempGen = null; if (debug) { LOG.debug("The first job has a submit time of " + firstJobSubmitTime); } final Configuration conf = getConf(); try { // At the top of this loop, skewBuffer has at most // skewBufferLength entries. while (job != null) { final Random tempNameGenerator = new Random(); lastJobSubmitTime = job.getSubmitTime(); ++numberJobs; if (job.getSubmitTime() >= currentIntervalEnd) { if (tempGen != null) { tempGen.close(); } nextSegment = null; for (int i = 0; i < 3 && nextSegment == null; ++i) { try { nextSegment = new Path(tempDir, "segment-" + tempNameGenerator.nextLong() + ".json.gz"); if (debug) { LOG.debug("The next segment name is " + nextSegment); } FileSystem fs = nextSegment.getFileSystem(conf); try { if (!fs.exists(nextSegment)) { break; } continue; } catch (IOException e) { // no code -- file did not already exist } } catch (IOException e) { // no code -- file exists now, or directory bad. We try three // times. } } if (nextSegment == null) { throw new RuntimeException("Failed to create a new file!"); } if (debug) { LOG.debug("Creating " + nextSegment + " for a job with a submit time of " + job.getSubmitTime()); } deletees.add(nextSegment); tempPaths.add(nextSegment); tempGen = new DefaultOutputter<LoggedJob>(); tempGen.init(nextSegment, conf); long currentIntervalNumber = (job.getSubmitTime() - firstJobSubmitTime) / inputCycle; currentIntervalEnd = firstJobSubmitTime + ((currentIntervalNumber + 1) * inputCycle); } // the temp files contain UDadjusted times, but each temp file's // content is in the same input cycle interval. if (tempGen != null) { tempGen.output(job); } job = reader.nextJob(); } } catch (DeskewedJobTraceReader.OutOfOrderException e) { return OUT_OF_ORDER_JOBS; } finally { if (tempGen != null) { tempGen.close(); } } if (lastJobSubmitTime <= firstJobSubmitTime) { LOG.error("All of your job[s] have the same submit time." + " Please just use your input file."); return ALL_JOBS_SIMULTANEOUS; } double submitTimeSpan = lastJobSubmitTime - firstJobSubmitTime; LOG.warn("Your input trace spans " + (lastJobSubmitTime - firstJobSubmitTime) + " ticks."); double foldingRatio = submitTimeSpan * (numberJobs + 1) / numberJobs / inputCycle; if (debug) { LOG.warn("run: submitTimeSpan = " + submitTimeSpan + ", numberJobs = " + numberJobs + ", inputCycle = " + inputCycle); } if (reader.neededSkewBufferSize() > 0) { LOG.warn("You needed a -skew-buffer-length of " + reader.neededSkewBufferSize() + " but no more, for this input."); } double tProbability = timeDilation * concentration / foldingRatio; if (debug) { LOG.warn("run: timeDilation = " + timeDilation + ", concentration = " + concentration + ", foldingRatio = " + foldingRatio); LOG.warn("The transcription probability is " + tProbability); } transcriptionRateInteger = (int) Math.floor(tProbability); transcriptionRateFraction = tProbability - Math.floor(tProbability); // Now read all the inputs in parallel heap = new PriorityQueue<Pair<LoggedJob, JobTraceReader>>(tempPaths.size(), new JobEntryComparator()); for (Path tempPath : tempPaths) { JobTraceReader thisReader = new JobTraceReader(tempPath, conf); closees.add(thisReader); LoggedJob streamFirstJob = thisReader.getNext(); long thisIndex = (streamFirstJob.getSubmitTime() - firstJobSubmitTime) / inputCycle; if (debug) { LOG.debug("A job with submit time of " + streamFirstJob.getSubmitTime() + " is in interval # " + thisIndex); } adjustJobTimes(streamFirstJob); if (debug) { LOG.debug("That job's submit time is adjusted to " + streamFirstJob.getSubmitTime()); } heap.add(new Pair<LoggedJob, JobTraceReader>(streamFirstJob, thisReader)); } Pair<LoggedJob, JobTraceReader> next = heap.poll(); while (next != null) { maybeOutput(next.first()); if (debug) { LOG.debug("The most recent job has an adjusted submit time of " + next.first().getSubmitTime()); LOG.debug(" Its replacement in the heap will come from input engine " + next.second()); } LoggedJob replacement = next.second().getNext(); if (replacement == null) { next.second().close(); if (debug) { LOG.debug("That input engine is depleted."); } } else { adjustJobTimes(replacement); if (debug) { LOG.debug("The replacement has an adjusted submit time of " + replacement.getSubmitTime()); } heap.add(new Pair<LoggedJob, JobTraceReader>(replacement, next.second())); } next = heap.poll(); } } finally { IOUtils.cleanup(null, reader); if (outGen != null) { outGen.close(); } for (Pair<LoggedJob, JobTraceReader> heapEntry : heap) { heapEntry.second().close(); } for (Closeable closee : closees) { closee.close(); } if (!debug) { Configuration conf = getConf(); for (Path deletee : deletees) { FileSystem fs = deletee.getFileSystem(conf); try { fs.delete(deletee, false); } catch (IOException e) { // no code } } } } return 0; }
From source file:de.tu_berlin.dima.aim3.querysuggestion.QuerySuggTCase.java
/** * Print results from the hdfs//w w w. j av a 2s .c o m * * @param resultPath */ protected void printResults(String resultPath) throws Exception { ArrayList<String> resultFiles = new ArrayList<String>(); // Determine all result files if (getFilesystemProvider().isDir(resultPath)) { for (String file : getFilesystemProvider().listFiles(resultPath)) { if (!getFilesystemProvider().isDir(file)) { resultFiles.add(resultPath + "/" + file); } } } else { resultFiles.add(resultPath); } // collect lines of all result files PriorityQueue<String> computedResult = new PriorityQueue<String>(); for (String resultFile : resultFiles) { // read each result file InputStream is = getFilesystemProvider().getInputStream(resultFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); // collect lines while (line != null) { computedResult.add(line); line = reader.readLine(); } reader.close(); } // Assert.assertEquals("Computed and expected results have different size", // expectedResult.size(), computedResult.size()); System.out.println("RESULTS:"); while (!computedResult.isEmpty()) { String computedLine = computedResult.poll(); System.out.println(computedLine); // if (LOG.isDebugEnabled()) // LOG.debug("compLine: <" + computedLine + ">"); // System.out.println("compLine: <" + computedLine + ">"); // Assert.assertEquals("Computed and expected lines differ", // expectedLine, computedLine); } }
From source file:com.android.switchaccess.HuffmanTreeBuilder.java
/** * Creates a HuffmanNode for each of the nodes in the {@code windowRoot}. The HuffmanNode * internally keeps track of the probability for each of these nodes. Finally, all the * HuffmanNodes are added to a priority queue to keep them sorted on an ascending order based * on their probabilities./*from w w w. j a v a 2s. c om*/ * * @param userContext The actions the user has taken so far. In case of an IME, this would be * what the user has typed so far. * @param windowRoot The root of the tree of SwitchAccessNodeCompats * @return Returns a TreeSet which contains all the HuffmanNodes in ascending order based on * their probabilities. If the {@code windowRoot} contains no clickable nodes, an empty * TreeSet is returned. */ private PriorityQueue<HuffmanNode> getOptionScanNodeProbabilities(String userContext, SwitchAccessNodeCompat windowRoot) { LinkedList<SwitchAccessNodeCompat> talkBackOrderList = TreeBuilderUtils.getNodesInTalkBackOrder(windowRoot); Set<SwitchAccessNodeCompat> talkBackOrderSet = new HashSet<>(talkBackOrderList); Map<SwitchAccessNodeCompat, Double> probabilityDistribution = mProbabilityModelReader .getProbabilityDistribution(userContext, talkBackOrderSet); PriorityQueue<HuffmanNode> optionScanNodeProbabilities = new PriorityQueue<>(); for (SwitchAccessNodeCompat currentNode : talkBackOrderSet) { Double currentNodeProbability = probabilityDistribution.get(currentNode); List<AccessibilityNodeActionNode> currentNodeActions = TreeBuilderUtils .getCompatActionNodes(currentNode); /* TODO(rmorina): need to think about the correct behaviour when there are more * than one actions associated with a node */ if (currentNodeActions.size() == 1) { optionScanNodeProbabilities.add(new HuffmanNode(currentNodeActions.get(0), currentNodeProbability)); } currentNode.recycle(); } return optionScanNodeProbabilities; }
From source file:sg.atom.utils._commons.lang.metadata.ScoringClassMapBuilder.java
public ClassMapBuilder<A, B> byDefault(MappingDirection direction, DefaultFieldMapper... withDefaults) { DefaultFieldMapper[] defaults;/* ww w .j a v a 2 s .c o m*/ if (withDefaults.length == 0) { defaults = getDefaultFieldMappers(); } else { defaults = withDefaults; } /* * For our custom 'byDefault' method, we're going to try and match * fields by their Levenshtein distance */ PriorityQueue<FieldMatchScore> matchScores = new PriorityQueue<FieldMatchScore>(); Map<String, Property> propertiesForA = getPropertyExpressions(getAType()); Map<String, Property> propertiesForB = getPropertyExpressions(getBType()); for (final Entry<String, Property> propertyA : propertiesForA.entrySet()) { if (!propertyA.getValue().getName().equals("class")) { for (final Entry<String, Property> propertyB : propertiesForB.entrySet()) { if (!propertyB.getValue().getName().equals("class")) { FieldMatchScore matchScore = new FieldMatchScore(propertyA.getValue(), propertyB.getValue(), matchingWeights); matchScores.add(matchScore); } } } } Set<String> unmatchedFields = new LinkedHashSet<String>(this.getPropertiesForTypeA()); unmatchedFields.remove("class"); for (FieldMatchScore score : matchScores) { if (!this.getMappedPropertiesForTypeA().contains(score.propertyA.getExpression()) && !this.getMappedPropertiesForTypeB().contains(score.propertyB.getExpression())) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("\n" + score.toString()); } if (score.meetsMinimumScore()) { fieldMap(score.propertyA.getExpression(), score.propertyB.getExpression()).direction(direction) .add(); unmatchedFields.remove(score.propertyA.getExpression()); } } } /* * Apply any default field mappers to the unmapped fields */ for (String propertyNameA : unmatchedFields) { Property prop = resolvePropertyForA(propertyNameA); for (DefaultFieldMapper defaulter : defaults) { String suggestion = defaulter.suggestMappedField(propertyNameA, prop.getType()); if (suggestion != null && getPropertiesForTypeB().contains(suggestion)) { if (!getMappedPropertiesForTypeB().contains(suggestion)) { fieldMap(propertyNameA, suggestion).direction(direction).add(); } } } } return this; }
From source file:eu.stratosphere.pact.test.util.TestBase.java
/** * Compares the expectedResultString and the file(s) in the HDFS linewise. * Both results (expected and computed) are held in memory. Hence, this * method should not be used to compare large results. * /*from www .jav a2 s. c o m*/ * The line comparator is used to compare lines from the expected and result set. * * @param expectedResult * @param hdfsPath * @param comp Line comparator */ protected void compareResultsByLinesInMemory(String expectedResultStr, String resultPath, Comparator<String> comp) throws Exception { ArrayList<String> resultFiles = new ArrayList<String>(); // Determine all result files if (getFilesystemProvider().isDir(resultPath)) { for (String file : getFilesystemProvider().listFiles(resultPath)) { if (!getFilesystemProvider().isDir(file)) { resultFiles.add(resultPath + "/" + file); } } } else { resultFiles.add(resultPath); } // collect lines of all result files PriorityQueue<String> computedResult = new PriorityQueue<String>(); for (String resultFile : resultFiles) { // read each result file InputStream is = getFilesystemProvider().getInputStream(resultFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); // collect lines while (line != null) { computedResult.add(line); line = reader.readLine(); } reader.close(); } PriorityQueue<String> expectedResult = new PriorityQueue<String>(); StringTokenizer st = new StringTokenizer(expectedResultStr, "\n"); while (st.hasMoreElements()) { expectedResult.add(st.nextToken()); } // log expected and computed results if (LOG.isDebugEnabled()) { LOG.debug("Expected: " + expectedResult); LOG.debug("Computed: " + computedResult); } Assert.assertEquals("Computed and expected results have different size", expectedResult.size(), computedResult.size()); while (!expectedResult.isEmpty()) { String expectedLine = expectedResult.poll(); String computedLine = computedResult.poll(); if (LOG.isDebugEnabled()) LOG.debug("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">"); Assert.assertEquals("Computed and expected lines differ", expectedLine, computedLine); } }
From source file:com.trk.aboutme.facebook.internal.FileLruCache.java
private void trim() { try {//from w ww .j a va 2s . c o m Logger.log(LoggingBehavior.CACHE, TAG, "trim started"); PriorityQueue<ModifiedFile> heap = new PriorityQueue<ModifiedFile>(); long size = 0; long count = 0; for (File file : this.directory.listFiles(BufferFile.excludeBufferFiles())) { ModifiedFile modified = new ModifiedFile(file); heap.add(modified); Logger.log(LoggingBehavior.CACHE, TAG, " trim considering time=" + Long.valueOf(modified.getModified()) + " name=" + modified.getFile().getName()); size += file.length(); count++; } while ((size > limits.getByteCount()) || (count > limits.getFileCount())) { File file = heap.remove().getFile(); Logger.log(LoggingBehavior.CACHE, TAG, " trim removing " + file.getName()); size -= file.length(); count--; file.delete(); } } finally { synchronized (lock) { isTrimPending = false; lock.notifyAll(); } } }
From source file:com.joliciel.talismane.tokeniser.patterns.IntervalPatternTokeniser.java
@Override public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) { MONITOR.startTask("tokeniseWithDecisions"); try {// ww w .j ava 2 s. c o m // apply any pre-tokenisation decisions via filters // we only want one placeholder per start index - the first one that gets added Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>(); for (TokenFilter tokenFilter : this.tokenFilters) { Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText()); for (TokenPlaceholder placeholder : myPlaceholders) { if (!placeholderMap.containsKey(placeholder.getStartIndex())) { placeholderMap.put(placeholder.getStartIndex(), placeholder); } } if (LOG.isTraceEnabled()) { if (myPlaceholders.size() > 0) { LOG.trace("TokenFilter: " + tokenFilter); LOG.trace("placeholders: " + myPlaceholders); } } } Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values()); // Initially, separate the sentence into tokens using the separators provided TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS, placeholders); // apply any pre-processing filters that have been added for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(tokenSequence); } // Assign each separator its default value List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence); List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>( defaultOutcomes.size()); for (TokeniserOutcome outcome : defaultOutcomes) { Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory .createDefaultDecision(outcome); tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName()); tokeniserDecision.addAuthority("_" + "DefaultDecision"); defaultDecisions.add(tokeniserDecision); } List<TokenisedAtomicTokenSequence> sequences = null; // For each test pattern, see if anything in the sentence matches it if (this.decisionMaker != null) { Set<Token> tokensToCheck = new HashSet<Token>(); MONITOR.startTask("pattern matching"); try { for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) { Set<Token> tokensToCheckForThisPattern = new HashSet<Token>(); List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence); for (TokenPatternMatchSequence tokenPatternMatch : matchesForThisPattern) { if (LOG.isTraceEnabled()) tokensToCheckForThisPattern.addAll(tokenPatternMatch.getTokensToCheck()); tokensToCheck.addAll(tokenPatternMatch.getTokensToCheck()); } if (LOG.isTraceEnabled()) { if (tokensToCheckForThisPattern.size() > 0) { LOG.trace("Parsed pattern: " + parsedPattern); LOG.trace("tokensToCheck: " + tokensToCheckForThisPattern); } } } } finally { MONITOR.endTask("pattern matching"); } // we want to create the n most likely token sequences // the sequence has to correspond to a token pattern // initially create a heap with a single, empty sequence PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); heap.add(emptySequence); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { if (LOG.isTraceEnabled()) { LOG.trace("Token : \"" + token.getText() + "\""); } // build a new heap for this iteration PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap; heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); // limit the heap breadth to K int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth() : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { TokenisedAtomicTokenSequence history = previousHeap.poll(); // Find the separating & non-separating decisions List<Decision<TokeniserOutcome>> decisions = null; if (tokensToCheck.contains(token)) { // test the features on the current token TokeniserContext context = new TokeniserContext(token, history); List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (TokeniserContextFeature<?> feature : tokeniserContextFeatures) { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(context, env); if (featureResult != null) { tokenFeatureResults.add(featureResult); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> featureResult : tokenFeatureResults) { LOG.trace(featureResult.toString()); } } } finally { MONITOR.endTask("analyse features"); } MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(tokenFeatureResults); for (ClassificationObserver<TokeniserOutcome> observer : this.observers) observer.onAnalyse(token, tokenFeatureResults, decisions); for (Decision<TokeniserOutcome> decision : decisions) { decision.addAuthority(this.getClass().getSimpleName()); for (TokenPatternMatch tokenMatch : token.getMatches()) { decision.addAuthority(tokenMatch.getPattern().toString()); } } } finally { MONITOR.endTask("make decision"); } } else { decisions = new ArrayList<Decision<TokeniserOutcome>>(); decisions.add(defaultDecisions.get(i)); } MONITOR.startTask("heap sort"); try { for (Decision<TokeniserOutcome> decision : decisions) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService .getTaggedToken(token, decision); TokenisedAtomicTokenSequence tokenisedSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); tokenisedSequence.add(taggedToken); if (decision.isStatistical()) tokenisedSequence.addDecision(decision); heap.add(tokenisedSequence); } } finally { MONITOR.endTask("heap sort"); } } // next sequence in the old heap i++; } // next token sequences = new ArrayList<TokenisedAtomicTokenSequence>(); i = 0; while (!heap.isEmpty()) { sequences.add(heap.poll()); i++; if (i >= this.getBeamWidth()) break; } } else { sequences = new ArrayList<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token, defaultDecisions.get(i++)); defaultSequence.add(taggedToken); } sequences.add(defaultSequence); } // have decision maker? LOG.debug("####Final token sequences:"); int j = 1; for (TokenisedAtomicTokenSequence sequence : sequences) { TokenSequence newTokenSequence = sequence.inferTokenSequence(); if (LOG.isDebugEnabled()) { LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Atomic sequence: " + sequence); LOG.debug("Resulting sequence: " + newTokenSequence); } // need to re-apply the pre-processing filters, because the tokens are all new // Question: why can't we conserve the initial tokens when they haven't changed at all? // Answer: because the tokenSequence and index in the sequence is referenced by the token. // Question: should we create a separate class, Token and TokenInSequence, // one with index & sequence access & one without? for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(newTokenSequence); } if (LOG.isDebugEnabled()) { LOG.debug("After filters: " + newTokenSequence); } } return sequences; } finally { MONITOR.endTask("tokeniseWithDecisions"); } }