List of usage examples for java.util TreeMap pollFirstEntry
public Map.Entry<K, V> pollFirstEntry()
From source file:Main.java
public static void main(String[] args) { TreeMap<Integer, String> treemap = new TreeMap<Integer, String>(); // populating tree map treemap.put(2, "two"); treemap.put(1, "one"); treemap.put(3, "three"); treemap.put(6, "six"); treemap.put(5, "from java2s.com"); // polling first entry System.out.println("Value before poll: " + treemap); System.out.println("Value returned: " + treemap.pollFirstEntry()); System.out.println("Value after poll: " + treemap); }
From source file:cosmos.example.BuildingPermitsExample.java
public static void main(String[] args) throws Exception { BuildingPermitsExample example = new BuildingPermitsExample(); new JCommander(example, args); File inputFile = new File(example.fileName); Preconditions.checkArgument(inputFile.exists() && inputFile.isFile() && inputFile.canRead(), "Expected " + example.fileName + " to be a readable file"); String zookeepers;/*from w w w .jav a2 s .c om*/ String instanceName; Connector connector; MiniAccumuloCluster mac = null; File macDir = null; // Use the MiniAccumuloCluster is requested if (example.useMiniAccumuloCluster) { macDir = Files.createTempDir(); String password = "password"; MiniAccumuloConfig config = new MiniAccumuloConfig(macDir, password); config.setNumTservers(1); mac = new MiniAccumuloCluster(config); mac.start(); zookeepers = mac.getZooKeepers(); instanceName = mac.getInstanceName(); ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector("root", new PasswordToken(password)); } else { // Otherwise connect to a running instance zookeepers = example.zookeepers; instanceName = example.instanceName; ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector(example.username, new PasswordToken(example.password)); } // Instantiate an instance of Cosmos Cosmos cosmos = new CosmosImpl(zookeepers); // Create a definition for the data we want to load Store id = Store.create(connector, new Authorizations(), AscendingIndexIdentitySet.create()); // Register the definition with Cosmos so it can track its progress. cosmos.register(id); // Load all of the data from our inputFile LoadBuildingPermits loader = new LoadBuildingPermits(cosmos, id, inputFile); loader.run(); // Finalize the SortableResult which will prevent future writes to the data set cosmos.finalize(id); // Flush the ingest traces to the backend so we can see the results; id.sendTraces(); // Get back the Set of Columns that we've ingested. Set<Column> schema = Sets.newHashSet(cosmos.columns(id)); log.debug("\nColumns: " + schema); Iterator<Column> iter = schema.iterator(); while (iter.hasNext()) { Column c = iter.next(); // Remove the internal ID field and columns that begin with CONTRACTOR_ if (c.equals(LoadBuildingPermits.ID) || c.name().startsWith("CONTRACTOR_")) { iter.remove(); } } Iterable<Index> indices = Iterables.transform(schema, new Function<Column, Index>() { @Override public Index apply(Column col) { return Index.define(col); } }); // Ensure that we have locality groups set as we expect log.info("Ensure locality groups are set"); id.optimizeIndices(indices); // Compact down the data for this SortableResult log.info("Issuing compaction for relevant data"); id.consolidate(); final int numTopValues = 10; // Walk through each column in the result set for (Column c : schema) { Stopwatch sw = new Stopwatch(); sw.start(); // Get the number of times we've seen each value in a given column CloseableIterable<Entry<RecordValue<?>, Long>> groupingsInColumn = cosmos.groupResults(id, c); log.info(c.name() + ":"); // Iterate over the counts, collecting the top N values in each column TreeMap<Long, RecordValue<?>> topValues = Maps.newTreeMap(); for (Entry<RecordValue<?>, Long> entry : groupingsInColumn) { if (topValues.size() == numTopValues) { Entry<Long, RecordValue<?>> least = topValues.pollFirstEntry(); if (least.getKey() < entry.getValue()) { topValues.put(entry.getValue(), entry.getKey()); } else { topValues.put(least.getKey(), least.getValue()); } } else if (topValues.size() < numTopValues) { topValues.put(entry.getValue(), entry.getKey()); } } for (Long key : topValues.descendingKeySet()) { log.info(topValues.get(key).value() + " occurred " + key + " times"); } sw.stop(); log.info("Took " + sw.toString() + " to run query.\n"); } log.info("Deleting records"); // Delete the records we've ingested if (!example.useMiniAccumuloCluster) { // Because I'm lazy and don't want to wait around to run the BatchDeleter when we're just going // to rm -rf the directory in a few secs. cosmos.delete(id); } // And shut down Cosmos cosmos.close(); log.info("Cosmos stopped"); // If we were using MAC, also stop that if (example.useMiniAccumuloCluster && null != mac) { mac.stop(); if (null != macDir) { FileUtils.deleteDirectory(macDir); } } }
From source file:ws.project.languagebasedlexiconanalisys.LexiconProcessor.java
public void writeTimeSeries(TreeMap<String, HashSet<String>> terms, String[] u) throws FileNotFoundException, UnsupportedEncodingException, IOException { PrintWriter writer = new PrintWriter(new File("time_series.csv"), "UTF-8"); writer.write("term;"); for (String d : u) { writer.write(d + ";"); }/*ww w .jav a 2s . c o m*/ writer.write("\n"); for (int i = 0; i < 100 || terms.isEmpty(); i++) { Entry<String, HashSet<String>> e = terms.pollFirstEntry(); writer.write(e.getKey() + ";"); for (String d : u) { if (e.getValue().contains(d)) { indexer.openReader((String) d); writer.write(indexer.getReader().docFreq(new Term("tweet", e.getKey())) + ";"); indexer.closeReader(); } else writer.write("0;"); } writer.write("\n"); } writer.close(); }
From source file:org.apache.oozie.service.ZKXLogStreamingService.java
/** * Contacts each of the other Oozie servers, gets their logs for the job, collates them, and sends them to the user via the * Writer. It will make sure to not read all of the log messages into memory at the same time to not use up the heap. If there * is a problem talking to one of the other servers, it will ignore that server and prepend a message to the Writer about it. * For getting the logs from this server, it won't use the REST API and instead get them directly to be more efficient. * * @param logStreamer the XLogStreamer//from w w w . j a v a2s . c o m * @param startTime the job start time * @param endTime the job end time * @param writer the writer * @throws IOException Signals that an I/O exception has occurred. */ private void collateLogs(XLogStreamer logStreamer, Date startTime, Date endTime, Writer writer) throws IOException { List<String> badOozies = new ArrayList<String>(); List<ServiceInstance<Map>> oozies = null; try { oozies = zk.getAllMetaData(); } catch (Exception ex) { throw new IOException("Issue communicating with ZooKeeper: " + ex.getMessage(), ex); } List<TimestampedMessageParser> parsers = new ArrayList<TimestampedMessageParser>(oozies.size()); try { // Create a BufferedReader for getting the logs of each server and put them in a TimestampedMessageParser for (ServiceInstance<Map> oozie : oozies) { Map<String, String> oozieMeta = oozie.getPayload(); String otherId = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_ID); // If it's this server, we can just get them directly if (otherId.equals(zk.getZKId())) { BufferedReader reader = logStreamer.makeReader(startTime, endTime); parsers.add(new TimestampedMessageParser(reader, logStreamer.getXLogFilter())); } // If it's another server, we'll have to use the REST API else { String otherUrl = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_URL); String jobId = logStreamer.getXLogFilter().getFilterParams().get(DagXLogInfoService.JOB); try { // It's important that we specify ALL_SERVERS_PARAM=false in the GET request to prevent the other Oozie // Server from trying aggregate logs from the other Oozie servers (and creating an infinite recursion) final String url = otherUrl + "/v" + OozieClient.WS_PROTOCOL_VERSION + "/" + RestConstants.JOB + "/" + jobId + "?" + RestConstants.JOB_SHOW_PARAM + "=" + logStreamer.getLogType() + "&" + RestConstants.ALL_SERVER_REQUEST + "=false" + AuthUrlClient.getQueryParamString(logStreamer.getRequestParam()); // remove doAs from url to avoid failure while fetching // logs in case of HA mode String key = "doAs"; String[] value = null; if (logStreamer.getRequestParam() != null) { value = logStreamer.getRequestParam().get(key); } String urlWithoutdoAs = null; if (value != null && value.length > 0 && value[0] != null && value[0].length() > 0) { urlWithoutdoAs = url.replace("&" + key + "=" + URLEncoder.encode(value[0], "UTF-8"), ""); } else { urlWithoutdoAs = url; } BufferedReader reader = AuthUrlClient.callServer(urlWithoutdoAs); parsers.add(new SimpleTimestampedMessageParser(reader, logStreamer.getXLogFilter())); } catch (IOException ioe) { log.warn( "Failed to retrieve logs for job [" + jobId + "] from Oozie server with ID [" + otherId + "] at [" + otherUrl + "]; log information may be incomplete", ioe); badOozies.add(otherId); } } } //If log param debug is set, we need to write start date and end date to outputstream. if (!StringUtils.isEmpty(logStreamer.getXLogFilter().getTruncatedMessage())) { writer.write(logStreamer.getXLogFilter().getTruncatedMessage()); } if (logStreamer.getXLogFilter().isDebugMode()) { writer.write(logStreamer.getXLogFilter().getDebugMessage()); } // Add a message about any servers we couldn't contact if (!badOozies.isEmpty()) { writer.write( "Unable to contact the following Oozie Servers for logs (log information may be incomplete):\n"); for (String badOozie : badOozies) { writer.write(" "); writer.write(badOozie); writer.write("\n"); } writer.write("\n"); writer.flush(); } // If it's just the one server (this server), then we don't need to do any more processing and can just copy it directly if (parsers.size() == 1) { TimestampedMessageParser parser = parsers.get(0); parser.processRemaining(writer, logStreamer); } else { // Now that we have a Reader for each server to get the logs from that server, we have to collate them. Within each // server, the logs should already be in the correct order, so we can take advantage of that. We'll use the // BufferedReaders to read the messages from the logs of each server and put them in order without having to bring // every message into memory at the same time. TreeMap<String, TimestampedMessageParser> timestampMap = new TreeMap<String, TimestampedMessageParser>(); // populate timestampMap with initial values for (TimestampedMessageParser parser : parsers) { if (parser.increment()) { timestampMap.put(parser.getLastTimestamp(), parser); } } while (timestampMap.size() > 1) { // The first entry will be the earliest based on the timestamp (also removes it) from the map TimestampedMessageParser earliestParser = timestampMap.pollFirstEntry().getValue(); // Write the message from that parser at that timestamp writer.write(earliestParser.getLastMessage()); if (logStreamer.shouldFlushOutput(earliestParser.getLastMessage().length())) { writer.flush(); } // Increment that parser to read the next message if (earliestParser.increment()) { // If it still has messages left, put it back in the map with the new last timestamp for it timestampMap.put(earliestParser.getLastTimestamp(), earliestParser); } } // If there's only one parser left in the map, then we can simply copy the rest of its lines directly to be faster if (timestampMap.size() == 1) { TimestampedMessageParser parser = timestampMap.values().iterator().next(); writer.write(parser.getLastMessage()); // don't forget the last message read by the parser parser.processRemaining(writer, logStreamer); } } } finally { for (TimestampedMessageParser parser : parsers) { parser.closeReader(); } } }
From source file:org.apdplat.superword.tools.TextAnalyzer.java
/** * * @param path ??//from w w w . j av a 2 s . co m * @param limit ??? * @param isTopN ??? */ public static TreeMap<Float, String> sentence(String path, int limit, boolean isTopN) { //? Set<String> fileNames = getFileNames(path); //? Map<String, AtomicInteger> frequency = frequency(fileNames); //? TreeMap<Float, String> sentences = new TreeMap<>(); //?? int count = 0; for (String fileName : fileNames) { try (BufferedReader reader = new BufferedReader( new InputStreamReader(new BufferedInputStream(new FileInputStream(fileName))))) { String line = null; while ((line = reader.readLine()) != null) { if (StringUtils.isBlank(line)) { continue; } // float score = 0; List<String> words = seg(line); for (String word : words) { AtomicInteger fre = frequency.get(word); if (fre == null || fre.get() == 0) { LOGGER.error("????" + line); score = 0; break; } score += 1 / (float) fre.get(); } words.clear(); if (score > 0) { //??? if (sentences.get(score) != null) { continue; } sentences.put(score, line + " <u><i>" + Paths.get(fileName).toFile().getName().replace(".txt", "") + "</i></u>"); count++; if (count >= limit) { if (isTopN) { // sentences.pollFirstEntry(); } else { // sentences.pollLastEntry(); } } } } } catch (IOException ex) { LOGGER.error("??", ex); } } return sentences; }
From source file:ch.unil.genescore.pathway.GeneSetLibrary.java
License:asdf
/** get a weighted random sample; implementing Efraimidis et al. 2006 */ double[] getWeightedRandomSample(int setLength, double[] totSet, double[] totWeights) { //NaturalRanking ranker = new NaturalRanking(); double[] draws = new double[totWeights.length]; int[] ranks = new int[totWeights.length]; double[] out = new double[setLength]; TreeMap<Double, Integer> myRankTree = new TreeMap<Double, Integer>(); int treesize = 0; for (int i = 0; i < totWeights.length; i++) { draws[i] = Math.log(rand.nextDouble()) / totWeights[i]; if (treesize < setLength) { treesize++;//from w ww . ja va2 s . c om myRankTree.put(draws[i], i); } else if (treesize == setLength) if (myRankTree.firstKey() < draws[i]) { myRankTree.pollFirstEntry(); myRankTree.put(draws[i], i); } } Iterator<Entry<Double, Integer>> it = myRankTree.entrySet().iterator(); Entry<Double, Integer> ent; int count = 0; while (it.hasNext()) { ent = it.next(); ranks[count] = ent.getValue(); count++; //int rank = it.; } for (int i = 0; i < setLength; i++) { out[i] = totSet[ranks[i]]; } return out; }
From source file:com.joliciel.jochre.analyser.BeamSearchImageAnalyser.java
public void analyseInternal(JochreImage image) { LOG.debug("Analysing image " + image.getId()); if (currentMonitor != null) { currentMonitor.setCurrentAction("imageMonitor.analysingImage", new Object[] { image.getPage().getIndex() }); }/*w ww.j a v a2 s .com*/ for (LetterGuessObserver observer : observers) { observer.onImageStart(image); } if (totalShapeCount < 0) totalShapeCount = image.getShapeCount(); for (Paragraph paragraph : image.getParagraphs()) { LOG.debug("Analysing paragraph " + paragraph.getIndex() + " (id=" + paragraph.getId() + ")"); List<LetterSequence> holdoverSequences = null; for (RowOfShapes row : paragraph.getRows()) { LOG.debug("Analysing row " + row.getIndex() + " (id=" + row.getId() + ")"); for (GroupOfShapes group : row.getGroups()) { if (group.isSkip()) { LOG.debug("Skipping group " + group.getIndex() + " (id=" + group.getId() + ")"); continue; } LOG.debug("Analysing group " + group.getIndex() + " (id=" + group.getId() + ")"); int width = group.getRight() - group.getLeft() + 1; List<ShapeSequence> shapeSequences = null; if (boundaryDetector != null) { shapeSequences = boundaryDetector.findBoundaries(group); } else { // simply add this groups shape's shapeSequences = new ArrayList<ShapeSequence>(); ShapeSequence shapeSequence = boundaryService.getEmptyShapeSequence(); for (Shape shape : group.getShapes()) shapeSequence.addShape(shape); shapeSequences.add(shapeSequence); } // Perform a beam search to guess the most likely sequence for this word TreeMap<Integer, PriorityQueue<LetterSequence>> heaps = new TreeMap<Integer, PriorityQueue<LetterSequence>>(); // prime a starter heap with the n best shape boundary analyses for this group PriorityQueue<LetterSequence> starterHeap = new PriorityQueue<LetterSequence>(1); for (ShapeSequence shapeSequence : shapeSequences) { LetterSequence emptySequence = this.getLetterGuesserService() .getEmptyLetterSequence(shapeSequence); starterHeap.add(emptySequence); } heaps.put(0, starterHeap); PriorityQueue<LetterSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Integer, PriorityQueue<LetterSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) LOG.trace("heap for index: " + heapEntry.getKey().intValue() + ", width: " + width); if (heapEntry.getKey().intValue() == width) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<LetterSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { LetterSequence history = previousHeap.poll(); ShapeInSequence shapeInSequence = history.getNextShape(); Shape shape = shapeInSequence.getShape(); if (LOG.isTraceEnabled()) { LOG.trace("Sequence " + history + ", shape: " + shape); } LogUtils.logMemory(LOG); int position = 0; if (Linguistics.getInstance(image.getPage().getDocument().getLocale()) .isLeftToRight()) { position = shape.getRight() - group.getLeft() + 1; } else { position = group.getRight() - shape.getLeft() + 1; } PriorityQueue<LetterSequence> heap = heaps.get(position); if (heap == null) { heap = new PriorityQueue<LetterSequence>(); heaps.put(position, heap); } MONITOR.startTask("guess letter"); try { letterGuesser.guessLetter(shapeInSequence, history); } finally { MONITOR.endTask(); } MONITOR.startTask("heap sort"); try { for (Decision<Letter> letterGuess : shape.getLetterGuesses()) { // leave out very low probability outcomes if (letterGuess.getProbability() > this.minOutcomeWeight) { LetterSequence sequence = this.getLetterGuesserService() .getLetterSequencePlusOne(history); sequence.add(letterGuess.getOutcome()); sequence.addDecision(letterGuess); heap.add(sequence); } // weight big enough to include } // next letter guess for this shape } finally { MONITOR.endTask(); } } // next history in heap } // any more heaps? LetterSequence bestSequence = null; boolean shouldCombineWithHoldover = false; boolean isHoldover = false; MONITOR.startTask("best sequence"); try { List<LetterSequence> finalSequences = new ArrayList<LetterSequence>(); for (int i = 0; i < this.beamWidth; i++) { if (finalHeap.isEmpty()) break; finalSequences.add(finalHeap.poll()); } if (this.getMostLikelyWordChooser() == null) { // most likely sequence is on top of the last heap bestSequence = finalSequences.get(0); } else { // get most likely sequence using lexicon if (holdoverSequences != null) { // we have a holdover from the previous row ending with a dash bestSequence = this.getMostLikelyWordChooser().chooseMostLikelyWord(finalSequences, holdoverSequences, this.beamWidth); shouldCombineWithHoldover = true; } else { // check if this is the last group on the row and could end with a dash boolean shouldBeHeldOver = false; if (group.getIndex() == row.getGroups().size() - 1 && row.getIndex() < paragraph.getRows().size() - 1) { for (LetterSequence letterSequence : finalSequences) { if (letterSequence.toString().endsWith("-")) { shouldBeHeldOver = true; break; } } } if (shouldBeHeldOver) { holdoverSequences = finalSequences; isHoldover = true; } else { // simplest case: no holdover bestSequence = this.getMostLikelyWordChooser() .chooseMostLikelyWord(finalSequences, this.beamWidth); } } // have we holdover sequences? } // have we a most likely word chooser? if (!isHoldover) { for (LetterGuessObserver observer : observers) { observer.onBeamSearchEnd(bestSequence, finalSequences, holdoverSequences); } } } finally { MONITOR.endTask(); } MONITOR.startTask("assign letter"); try { if (shouldCombineWithHoldover) { holdoverSequences = null; } if (!isHoldover) { for (LetterGuessObserver observer : observers) { observer.onStartSequence(bestSequence); } group.setBestLetterSequence(bestSequence); int i = 0; for (ShapeInSequence shapeInSequence : bestSequence.getUnderlyingShapeSequence()) { String bestOutcome = bestSequence.get(i).getString(); this.assignLetter(shapeInSequence, bestOutcome); i++; } // next shape for (LetterGuessObserver observer : observers) { observer.onGuessSequence(bestSequence); } } this.shapeCount += group.getShapes().size(); if (this.currentMonitor != null) { double progress = (double) shapeCount / (double) totalShapeCount; LOG.debug("progress: " + progress); currentMonitor.setPercentComplete(progress); } } finally { MONITOR.endTask(); } } // next group } // next row } // next paragraph for (LetterGuessObserver observer : observers) { observer.onImageEnd(); } }
From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java
@Override public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) { MONITOR.startTask("tagSentence"); try {/*from w w w .j av a 2 s . c o m*/ MONITOR.startTask("apply filters"); try { for (TokenSequence tokenSequence : tokenSequences) { for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) { tokenFilter.apply(tokenSequence); } } } finally { MONITOR.endTask("apply filters"); } int sentenceLength = tokenSequences.get(0).getText().length(); TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>(); PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>(); for (TokenSequence tokenSequence : tokenSequences) { // add an empty PosTagSequence for each token sequence PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0); emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(emptySequence); } heaps.put(0.0, heap0); PriorityQueue<PosTagSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) { LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength); } if (heapEntry.getKey() == sentenceLength) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { PosTagSequence history = previousHeap.poll(); Token token = history.getNextToken(); if (LOG.isTraceEnabled()) { LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString()); LOG.trace("Prob: " + df.format(history.getScore())); LOG.trace("Token: " + token.getText()); StringBuilder sb = new StringBuilder(); for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) { if (oneToken.equals(token)) sb.append("[" + oneToken + "]"); else sb.append(oneToken); } LOG.trace(sb.toString()); } PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history); List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>(); // test the positive rules on the current token boolean ruleApplied = false; if (posTaggerPositiveRules != null) { MONITOR.startTask("check rules"); try { for (PosTaggerRule rule : posTaggerPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet() .createDefaultDecision(rule.getTag()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current token List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) { MONITOR.startTask(posTaggerFeature.getCollectionName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = posTaggerFeature.check(context, env); if (featureResult != null) featureResults.add(featureResult); } finally { MONITOR.endTask(posTaggerFeature.getCollectionName()); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> result : featureResults) { LOG.trace(result.toString()); } } } finally { MONITOR.endTask("analyse features"); } // evaluate the feature results using the maxent model MONITOR.startTask("make decision"); decisions = this.decisionMaker.decide(featureResults); MONITOR.endTask("make decision"); for (ClassificationObserver<PosTag> observer : this.observers) { observer.onAnalyse(token, featureResults, decisions); } // apply the negative rules Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>(); if (posTaggerNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (PosTaggerRule rule : posTaggerNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedPosTags.add(rule.getTag()); if (LOG.isTraceEnabled()) { LOG.trace( "Rule applies. Eliminating posTag: " + rule.getTag().getCode()); } } } if (eliminatedPosTags.size() > 0) { List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (!eliminatedPosTags.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } // is this a known word in the lexicon? MONITOR.startTask("apply constraints"); try { if (LOG.isTraceEnabled()) { String posTags = ""; for (PosTag onePosTag : token.getPossiblePosTags()) { posTags += onePosTag.getCode() + ","; } LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags); } List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (decision.getProbability() >= MIN_PROB_TO_STORE) { decisionShortList.add(decision); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } } finally { MONITOR.endTask("apply constraints"); } } // has a rule been applied? // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt MONITOR.startTask("heap sort"); for (Decision<PosTag> decision : decisions) { if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability()); PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token, decision); PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history); sequence.addPosTaggedToken(posTaggedToken); if (decision.isStatistical()) sequence.addDecision(decision); double heapIndex = token.getEndIndex(); // add another half for an empty token, to differentiate it from regular ones if (token.getStartIndex() == token.getEndIndex()) heapIndex += 0.5; // if it's the last token, make sure we end if (token.getIndex() == sequence.getTokenSequence().size() - 1) heapIndex = sentenceLength; if (LOG.isTraceEnabled()) LOG.trace("Heap index: " + heapIndex); PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex); if (heap == null) { heap = new PriorityQueue<PosTagSequence>(); heaps.put(heapIndex, heap); } heap.add(sequence); } // next outcome for this token MONITOR.endTask("heap sort"); } // next history } // next atomic index // return the best sequence on the heap List<PosTagSequence> sequences = new ArrayList<PosTagSequence>(); int i = 0; while (!finalHeap.isEmpty()) { sequences.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } // apply post-processing filters LOG.debug("####Final postag sequences:"); int j = 1; for (PosTagSequence sequence : sequences) { if (LOG.isDebugEnabled()) { LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Sequence before filters: " + sequence); } for (PosTagSequenceFilter filter : this.postProcessingFilters) filter.apply(sequence); if (LOG.isDebugEnabled()) { LOG.debug("Sequence after filters: " + sequence); } } return sequences; } finally { MONITOR.endTask("tagSentence"); } }
From source file:org.wso2.carbon.governance.registry.extensions.executors.ServiceVersionExecutor.java
private String reformatPath(String path, String currentExpression, String targetExpression, String newResourceVersion) throws RegistryException { TreeMap<Integer, String> indexMap = new TreeMap<Integer, String>(); String returnPath = targetExpression; String prefix;/* ww w . j a va 2 s .c o m*/ if (currentExpression.equals(targetExpression)) { return path; } indexMap.put(currentExpression.indexOf(RESOURCE_NAME), RESOURCE_NAME); indexMap.put(currentExpression.indexOf(RESOURCE_PATH), RESOURCE_PATH); indexMap.put(currentExpression.indexOf(RESOURCE_VERSION), RESOURCE_VERSION); String tempExpression = currentExpression; while (indexMap.lastKey() < tempExpression.lastIndexOf(RegistryConstants.PATH_SEPARATOR)) { tempExpression = tempExpression.substring(0, tempExpression.lastIndexOf(RegistryConstants.PATH_SEPARATOR)); path = path.substring(0, path.lastIndexOf(RegistryConstants.PATH_SEPARATOR)); } prefix = currentExpression.substring(0, currentExpression.indexOf(indexMap.get(indexMap.higherKey(-1)))); if (!path.startsWith(prefix)) { return path; } path = path.replace(prefix, ""); while (true) { if (indexMap.firstKey() < 0) { indexMap.pollFirstEntry(); } else { break; } } while (true) { if (indexMap.size() == 0) { break; } Map.Entry lastEntry = indexMap.pollLastEntry(); if (lastEntry.getValue().equals(RESOURCE_PATH)) { String pathValue = path; for (int i = 0; i < indexMap.size(); i++) { // pathValue = formatPath(pathValue.substring(path.indexOf(RegistryConstants.PATH_SEPARATOR))); pathValue = formatPath( pathValue.substring(pathValue.indexOf(RegistryConstants.PATH_SEPARATOR))); } if (!pathValue.equals("")) { returnPath = returnPath.replace(RESOURCE_PATH, formatPath(pathValue)); path = path.replace(pathValue, ""); } else { returnPath = returnPath.replace("/" + lastEntry.getValue(), ""); } continue; } if (lastEntry.getValue().equals(RESOURCE_VERSION)) { returnPath = returnPath.replace(RESOURCE_VERSION, newResourceVersion); if (path.contains("/")) { path = path.substring(0, path.lastIndexOf(RegistryConstants.PATH_SEPARATOR)); } else { path = ""; } continue; } String tempPath; if (path.contains("/")) { tempPath = path.substring(path.lastIndexOf(RegistryConstants.PATH_SEPARATOR) + 1); } else { tempPath = path; } if (!tempPath.equals("")) { returnPath = returnPath.replace((String) lastEntry.getValue(), formatPath(tempPath)); if (path.contains("/")) { path = path.substring(0, path.lastIndexOf(RegistryConstants.PATH_SEPARATOR)); } else { path = ""; } } else { returnPath = returnPath.replace("/" + lastEntry.getValue(), ""); if (path.contains("/")) { path = path.substring(0, path.lastIndexOf(RegistryConstants.PATH_SEPARATOR)); } } } // Adding the version validation here. if (!newResourceVersion.matches("^\\d+[.]\\d+[.]\\d+(-[a-zA-Z0-9]+)?$")) { String message = "Invalid version found for " + RegistryUtils.getResourceName(path); log.error(message); throw new RegistryException(message); } if (returnPath.contains(RESOURCE_VERSION)) { return returnPath.replace(RESOURCE_VERSION, newResourceVersion); } return returnPath; }
From source file:com.joliciel.talismane.parser.TransitionBasedParserImpl.java
@Override public List<ParseConfiguration> parseSentence(List<PosTagSequence> posTagSequences) { MONITOR.startTask("parseSentence"); try {// w w w. ja v a 2 s. c o m long startTime = (new Date()).getTime(); int maxAnalysisTimeMilliseconds = maxAnalysisTimePerSentence * 1000; int minFreeMemoryBytes = minFreeMemory * KILOBYTE; TokenSequence tokenSequence = posTagSequences.get(0).getTokenSequence(); TreeMap<Integer, PriorityQueue<ParseConfiguration>> heaps = new TreeMap<Integer, PriorityQueue<ParseConfiguration>>(); PriorityQueue<ParseConfiguration> heap0 = new PriorityQueue<ParseConfiguration>(); for (PosTagSequence posTagSequence : posTagSequences) { // add an initial ParseConfiguration for each postag sequence ParseConfiguration initialConfiguration = this.getParserServiceInternal() .getInitialConfiguration(posTagSequence); initialConfiguration.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(initialConfiguration); if (LOG.isDebugEnabled()) { LOG.debug("Adding initial posTagSequence: " + posTagSequence); } } heaps.put(0, heap0); PriorityQueue<ParseConfiguration> backupHeap = null; PriorityQueue<ParseConfiguration> finalHeap = null; PriorityQueue<ParseConfiguration> terminalHeap = new PriorityQueue<ParseConfiguration>(); while (heaps.size() > 0) { Entry<Integer, PriorityQueue<ParseConfiguration>> heapEntry = heaps.pollFirstEntry(); PriorityQueue<ParseConfiguration> currentHeap = heapEntry.getValue(); int currentHeapIndex = heapEntry.getKey(); if (LOG.isTraceEnabled()) { LOG.trace("##### Polling next heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); } boolean finished = false; // systematically set the final heap here, just in case we exit "naturally" with no more heaps finalHeap = heapEntry.getValue(); backupHeap = new PriorityQueue<ParseConfiguration>(); // we jump out when either (a) all tokens have been attached or (b) we go over the max alloted time ParseConfiguration topConf = currentHeap.peek(); if (topConf.isTerminal()) { LOG.trace("Exiting with terminal heap: " + heapEntry.getKey() + ", size: " + heapEntry.getValue().size()); finished = true; } if (earlyStop && terminalHeap.size() >= beamWidth) { LOG.debug( "Early stop activated and terminal heap contains " + beamWidth + " entries. Exiting."); finalHeap = terminalHeap; finished = true; } long analysisTime = (new Date()).getTime() - startTime; if (maxAnalysisTimePerSentence > 0 && analysisTime > maxAnalysisTimeMilliseconds) { LOG.info("Parse tree analysis took too long for sentence: " + tokenSequence.getText()); LOG.info("Breaking out after " + maxAnalysisTimePerSentence + " seconds."); finished = true; } if (minFreeMemory > 0) { long freeMemory = Runtime.getRuntime().freeMemory(); if (freeMemory < minFreeMemoryBytes) { LOG.info("Not enough memory left to parse sentence: " + tokenSequence.getText()); LOG.info("Min free memory (bytes):" + minFreeMemoryBytes); LOG.info("Current free memory (bytes): " + freeMemory); finished = true; } } if (finished) { break; } // limit the breadth to K int maxSequences = currentHeap.size() > this.beamWidth ? this.beamWidth : currentHeap.size(); int j = 0; while (currentHeap.size() > 0) { ParseConfiguration history = currentHeap.poll(); if (LOG.isTraceEnabled()) { LOG.trace("### Next configuration on heap " + heapEntry.getKey() + ":"); LOG.trace(history.toString()); LOG.trace("Score: " + df.format(history.getScore())); LOG.trace(history.getPosTagSequence()); } List<Decision<Transition>> decisions = new ArrayList<Decision<Transition>>(); // test the positive rules on the current configuration boolean ruleApplied = false; if (parserPositiveRules != null) { MONITOR.startTask("check rules"); try { for (ParserRule rule : parserPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<Transition> positiveRuleDecision = TalismaneSession .getTransitionSystem().createDefaultDecision(rule.getTransition()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting transition to: " + rule.getTransition().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current configuration List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("feature analyse"); try { for (ParseConfigurationFeature<?> feature : this.parseFeatures) { MONITOR.startTask(feature.getName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(history, env); if (featureResult != null) parseFeatureResults.add(featureResult); } finally { MONITOR.endTask(feature.getName()); } } if (LOG_FEATURES.isTraceEnabled()) { for (FeatureResult<?> featureResult : parseFeatureResults) { LOG_FEATURES.trace(featureResult.toString()); } } } finally { MONITOR.endTask("feature analyse"); } // evaluate the feature results using the decision maker MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(parseFeatureResults); for (ClassificationObserver<Transition> observer : this.observers) { observer.onAnalyse(history, parseFeatureResults, decisions); } List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>( decisions.size()); for (Decision<Transition> decision : decisions) { if (decision.getProbability() > MIN_PROB_TO_STORE) decisionShortList.add(decision); } decisions = decisionShortList; } finally { MONITOR.endTask("make decision"); } // apply the negative rules Set<Transition> eliminatedTransitions = new HashSet<Transition>(); if (parserNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (ParserRule rule : parserNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.toString()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(history, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedTransitions.addAll(rule.getTransitions()); if (LOG.isTraceEnabled()) { for (Transition eliminatedTransition : rule.getTransitions()) LOG.trace("Rule applies. Eliminating transition: " + eliminatedTransition.getCode()); } } } if (eliminatedTransitions.size() > 0) { List<Decision<Transition>> decisionShortList = new ArrayList<Decision<Transition>>(); for (Decision<Transition> decision : decisions) { if (!eliminatedTransitions.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } } // has a positive rule been applied? boolean transitionApplied = false; // add new configuration to the heap, one for each valid transition MONITOR.startTask("heap sort"); try { // Why apply all decisions here? Why not just the top N (where N = beamwidth)? // Answer: because we're not always adding solutions to the same heap // And yet: a decision here can only do one of two things: process a token (heap+1000), or add a non-processing transition (heap+1) // So, if we've already applied N decisions of each type, we should be able to stop for (Decision<Transition> decision : decisions) { Transition transition = decision.getOutcome(); if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + transition.getCode() + ", " + decision.getProbability()); if (transition.checkPreconditions(history)) { transitionApplied = true; ParseConfiguration configuration = this.parserServiceInternal .getConfiguration(history); if (decision.isStatistical()) configuration.addDecision(decision); transition.apply(configuration); int nextHeapIndex = parseComparisonStrategy.getComparisonIndex(configuration) * 1000; if (configuration.isTerminal()) { nextHeapIndex = Integer.MAX_VALUE; } else { while (nextHeapIndex <= currentHeapIndex) nextHeapIndex++; } PriorityQueue<ParseConfiguration> nextHeap = heaps.get(nextHeapIndex); if (nextHeap == null) { if (configuration.isTerminal()) nextHeap = terminalHeap; else nextHeap = new PriorityQueue<ParseConfiguration>(); heaps.put(nextHeapIndex, nextHeap); if (LOG.isTraceEnabled()) LOG.trace("Created heap with index: " + nextHeapIndex); } nextHeap.add(configuration); if (LOG.isTraceEnabled()) { LOG.trace("Added configuration with score " + configuration.getScore() + " to heap: " + nextHeapIndex + ", total size: " + nextHeap.size()); } configuration.clearMemory(); } else { if (LOG.isTraceEnabled()) LOG.trace("Cannot apply transition: doesn't meet pre-conditions"); // just in case the we run out of both heaps and analyses, we build this backup heap backupHeap.add(history); } // does transition meet pre-conditions? } // next transition } finally { MONITOR.endTask("heap sort"); } if (transitionApplied) { j++; } else { LOG.trace("No transitions could be applied: not counting this history as part of the beam"); } // beam width test if (j == maxSequences) break; } // next history } // next atomic index // return the best sequences on the heap List<ParseConfiguration> bestConfigurations = new ArrayList<ParseConfiguration>(); int i = 0; if (finalHeap.isEmpty()) finalHeap = backupHeap; while (!finalHeap.isEmpty()) { bestConfigurations.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } if (LOG.isDebugEnabled()) { for (ParseConfiguration finalConfiguration : bestConfigurations) { LOG.debug(df.format(finalConfiguration.getScore()) + ": " + finalConfiguration.toString()); LOG.debug("Pos tag sequence: " + finalConfiguration.getPosTagSequence()); LOG.debug("Transitions: " + finalConfiguration.getTransitions()); LOG.debug("Decisions: " + finalConfiguration.getDecisions()); if (LOG.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); for (Decision<Transition> decision : finalConfiguration.getDecisions()) { sb.append(" * "); sb.append(df.format(decision.getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getTransitions().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * PosTag sequence score "); sb.append(df.format(finalConfiguration.getPosTagSequence().getScore())); sb.append(" = "); for (PosTaggedToken posTaggedToken : finalConfiguration.getPosTagSequence()) { sb.append(" * "); sb.append(df.format(posTaggedToken.getDecision().getProbability())); } sb.append(" root "); sb.append(finalConfiguration.getPosTagSequence().size()); LOG.trace(sb.toString()); sb = new StringBuilder(); sb.append(" * Token sequence score = "); sb.append(df.format(finalConfiguration.getPosTagSequence().getTokenSequence().getScore())); LOG.trace(sb.toString()); } } } return bestConfigurations; } finally { MONITOR.endTask("parseSentence"); } }