List of usage examples for java.util PriorityQueue size
int size
To view the source code for java.util PriorityQueue size.
Click Source Link
From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java
@Override public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) { MONITOR.startTask("tagSentence"); try {/* w w w . jav a 2 s .c om*/ MONITOR.startTask("apply filters"); try { for (TokenSequence tokenSequence : tokenSequences) { for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) { tokenFilter.apply(tokenSequence); } } } finally { MONITOR.endTask("apply filters"); } int sentenceLength = tokenSequences.get(0).getText().length(); TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>(); PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>(); for (TokenSequence tokenSequence : tokenSequences) { // add an empty PosTagSequence for each token sequence PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0); emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(emptySequence); } heaps.put(0.0, heap0); PriorityQueue<PosTagSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) { LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength); } if (heapEntry.getKey() == sentenceLength) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { PosTagSequence history = previousHeap.poll(); Token token = history.getNextToken(); if (LOG.isTraceEnabled()) { LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString()); LOG.trace("Prob: " + df.format(history.getScore())); LOG.trace("Token: " + token.getText()); StringBuilder sb = new StringBuilder(); for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) { if (oneToken.equals(token)) sb.append("[" + oneToken + "]"); else sb.append(oneToken); } LOG.trace(sb.toString()); } PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history); List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>(); // test the positive rules on the current token boolean ruleApplied = false; if (posTaggerPositiveRules != null) { MONITOR.startTask("check rules"); try { for (PosTaggerRule rule : posTaggerPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet() .createDefaultDecision(rule.getTag()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current token List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) { MONITOR.startTask(posTaggerFeature.getCollectionName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = posTaggerFeature.check(context, env); if (featureResult != null) featureResults.add(featureResult); } finally { MONITOR.endTask(posTaggerFeature.getCollectionName()); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> result : featureResults) { LOG.trace(result.toString()); } } } finally { MONITOR.endTask("analyse features"); } // evaluate the feature results using the maxent model MONITOR.startTask("make decision"); decisions = this.decisionMaker.decide(featureResults); MONITOR.endTask("make decision"); for (ClassificationObserver<PosTag> observer : this.observers) { observer.onAnalyse(token, featureResults, decisions); } // apply the negative rules Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>(); if (posTaggerNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (PosTaggerRule rule : posTaggerNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedPosTags.add(rule.getTag()); if (LOG.isTraceEnabled()) { LOG.trace( "Rule applies. Eliminating posTag: " + rule.getTag().getCode()); } } } if (eliminatedPosTags.size() > 0) { List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (!eliminatedPosTags.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } // is this a known word in the lexicon? MONITOR.startTask("apply constraints"); try { if (LOG.isTraceEnabled()) { String posTags = ""; for (PosTag onePosTag : token.getPossiblePosTags()) { posTags += onePosTag.getCode() + ","; } LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags); } List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (decision.getProbability() >= MIN_PROB_TO_STORE) { decisionShortList.add(decision); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } } finally { MONITOR.endTask("apply constraints"); } } // has a rule been applied? // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt MONITOR.startTask("heap sort"); for (Decision<PosTag> decision : decisions) { if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability()); PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token, decision); PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history); sequence.addPosTaggedToken(posTaggedToken); if (decision.isStatistical()) sequence.addDecision(decision); double heapIndex = token.getEndIndex(); // add another half for an empty token, to differentiate it from regular ones if (token.getStartIndex() == token.getEndIndex()) heapIndex += 0.5; // if it's the last token, make sure we end if (token.getIndex() == sequence.getTokenSequence().size() - 1) heapIndex = sentenceLength; if (LOG.isTraceEnabled()) LOG.trace("Heap index: " + heapIndex); PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex); if (heap == null) { heap = new PriorityQueue<PosTagSequence>(); heaps.put(heapIndex, heap); } heap.add(sequence); } // next outcome for this token MONITOR.endTask("heap sort"); } // next history } // next atomic index // return the best sequence on the heap List<PosTagSequence> sequences = new ArrayList<PosTagSequence>(); int i = 0; while (!finalHeap.isEmpty()) { sequences.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } // apply post-processing filters LOG.debug("####Final postag sequences:"); int j = 1; for (PosTagSequence sequence : sequences) { if (LOG.isDebugEnabled()) { LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Sequence before filters: " + sequence); } for (PosTagSequenceFilter filter : this.postProcessingFilters) filter.apply(sequence); if (LOG.isDebugEnabled()) { LOG.debug("Sequence after filters: " + sequence); } } return sequences; } finally { MONITOR.endTask("tagSentence"); } }
From source file:com.joliciel.talismane.tokeniser.patterns.IntervalPatternTokeniser.java
@Override public List<TokenisedAtomicTokenSequence> tokeniseWithDecisions(Sentence sentence) { MONITOR.startTask("tokeniseWithDecisions"); try {//from w w w .ja va 2 s .c o m // apply any pre-tokenisation decisions via filters // we only want one placeholder per start index - the first one that gets added Map<Integer, TokenPlaceholder> placeholderMap = new HashMap<Integer, TokenPlaceholder>(); for (TokenFilter tokenFilter : this.tokenFilters) { Set<TokenPlaceholder> myPlaceholders = tokenFilter.apply(sentence.getText()); for (TokenPlaceholder placeholder : myPlaceholders) { if (!placeholderMap.containsKey(placeholder.getStartIndex())) { placeholderMap.put(placeholder.getStartIndex(), placeholder); } } if (LOG.isTraceEnabled()) { if (myPlaceholders.size() > 0) { LOG.trace("TokenFilter: " + tokenFilter); LOG.trace("placeholders: " + myPlaceholders); } } } Set<TokenPlaceholder> placeholders = new HashSet<TokenPlaceholder>(placeholderMap.values()); // Initially, separate the sentence into tokens using the separators provided TokenSequence tokenSequence = this.tokeniserService.getTokenSequence(sentence, Tokeniser.SEPARATORS, placeholders); // apply any pre-processing filters that have been added for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(tokenSequence); } // Assign each separator its default value List<TokeniserOutcome> defaultOutcomes = this.tokeniserPatternManager.getDefaultOutcomes(tokenSequence); List<Decision<TokeniserOutcome>> defaultDecisions = new ArrayList<Decision<TokeniserOutcome>>( defaultOutcomes.size()); for (TokeniserOutcome outcome : defaultOutcomes) { Decision<TokeniserOutcome> tokeniserDecision = this.tokeniserDecisionFactory .createDefaultDecision(outcome); tokeniserDecision.addAuthority("_" + this.getClass().getSimpleName()); tokeniserDecision.addAuthority("_" + "DefaultDecision"); defaultDecisions.add(tokeniserDecision); } List<TokenisedAtomicTokenSequence> sequences = null; // For each test pattern, see if anything in the sentence matches it if (this.decisionMaker != null) { Set<Token> tokensToCheck = new HashSet<Token>(); MONITOR.startTask("pattern matching"); try { for (TokenPattern parsedPattern : this.getTokeniserPatternManager().getParsedTestPatterns()) { Set<Token> tokensToCheckForThisPattern = new HashSet<Token>(); List<TokenPatternMatchSequence> matchesForThisPattern = parsedPattern.match(tokenSequence); for (TokenPatternMatchSequence tokenPatternMatch : matchesForThisPattern) { if (LOG.isTraceEnabled()) tokensToCheckForThisPattern.addAll(tokenPatternMatch.getTokensToCheck()); tokensToCheck.addAll(tokenPatternMatch.getTokensToCheck()); } if (LOG.isTraceEnabled()) { if (tokensToCheckForThisPattern.size() > 0) { LOG.trace("Parsed pattern: " + parsedPattern); LOG.trace("tokensToCheck: " + tokensToCheckForThisPattern); } } } } finally { MONITOR.endTask("pattern matching"); } // we want to create the n most likely token sequences // the sequence has to correspond to a token pattern // initially create a heap with a single, empty sequence PriorityQueue<TokenisedAtomicTokenSequence> heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence emptySequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); heap.add(emptySequence); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { if (LOG.isTraceEnabled()) { LOG.trace("Token : \"" + token.getText() + "\""); } // build a new heap for this iteration PriorityQueue<TokenisedAtomicTokenSequence> previousHeap = heap; heap = new PriorityQueue<TokenisedAtomicTokenSequence>(); // limit the heap breadth to K int maxSequences = previousHeap.size() > this.getBeamWidth() ? this.getBeamWidth() : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { TokenisedAtomicTokenSequence history = previousHeap.poll(); // Find the separating & non-separating decisions List<Decision<TokeniserOutcome>> decisions = null; if (tokensToCheck.contains(token)) { // test the features on the current token TokeniserContext context = new TokeniserContext(token, history); List<FeatureResult<?>> tokenFeatureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (TokeniserContextFeature<?> feature : tokeniserContextFeatures) { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = feature.check(context, env); if (featureResult != null) { tokenFeatureResults.add(featureResult); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> featureResult : tokenFeatureResults) { LOG.trace(featureResult.toString()); } } } finally { MONITOR.endTask("analyse features"); } MONITOR.startTask("make decision"); try { decisions = this.decisionMaker.decide(tokenFeatureResults); for (ClassificationObserver<TokeniserOutcome> observer : this.observers) observer.onAnalyse(token, tokenFeatureResults, decisions); for (Decision<TokeniserOutcome> decision : decisions) { decision.addAuthority(this.getClass().getSimpleName()); for (TokenPatternMatch tokenMatch : token.getMatches()) { decision.addAuthority(tokenMatch.getPattern().toString()); } } } finally { MONITOR.endTask("make decision"); } } else { decisions = new ArrayList<Decision<TokeniserOutcome>>(); decisions.add(defaultDecisions.get(i)); } MONITOR.startTask("heap sort"); try { for (Decision<TokeniserOutcome> decision : decisions) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService .getTaggedToken(token, decision); TokenisedAtomicTokenSequence tokenisedSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(history); tokenisedSequence.add(taggedToken); if (decision.isStatistical()) tokenisedSequence.addDecision(decision); heap.add(tokenisedSequence); } } finally { MONITOR.endTask("heap sort"); } } // next sequence in the old heap i++; } // next token sequences = new ArrayList<TokenisedAtomicTokenSequence>(); i = 0; while (!heap.isEmpty()) { sequences.add(heap.poll()); i++; if (i >= this.getBeamWidth()) break; } } else { sequences = new ArrayList<TokenisedAtomicTokenSequence>(); TokenisedAtomicTokenSequence defaultSequence = this.getTokeniserService() .getTokenisedAtomicTokenSequence(sentence, 0); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { TaggedToken<TokeniserOutcome> taggedToken = this.tokeniserService.getTaggedToken(token, defaultDecisions.get(i++)); defaultSequence.add(taggedToken); } sequences.add(defaultSequence); } // have decision maker? LOG.debug("####Final token sequences:"); int j = 1; for (TokenisedAtomicTokenSequence sequence : sequences) { TokenSequence newTokenSequence = sequence.inferTokenSequence(); if (LOG.isDebugEnabled()) { LOG.debug("Token sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Atomic sequence: " + sequence); LOG.debug("Resulting sequence: " + newTokenSequence); } // need to re-apply the pre-processing filters, because the tokens are all new // Question: why can't we conserve the initial tokens when they haven't changed at all? // Answer: because the tokenSequence and index in the sequence is referenced by the token. // Question: should we create a separate class, Token and TokenInSequence, // one with index & sequence access & one without? for (TokenSequenceFilter tokenSequenceFilter : this.tokenSequenceFilters) { tokenSequenceFilter.apply(newTokenSequence); } if (LOG.isDebugEnabled()) { LOG.debug("After filters: " + newTokenSequence); } } return sequences; } finally { MONITOR.endTask("tokeniseWithDecisions"); } }
From source file:eu.stratosphere.pact.test.util.TestBase.java
/** * Compares the expectedResultString and the file(s) in the HDFS linewise. * Both results (expected and computed) are held in memory. Hence, this * method should not be used to compare large results. * /*from w ww . ja v a 2s . c om*/ * The line comparator is used to compare lines from the expected and result set. * * @param expectedResult * @param hdfsPath * @param comp Line comparator */ protected void compareResultsByLinesInMemory(String expectedResultStr, String resultPath, Comparator<String> comp) throws Exception { ArrayList<String> resultFiles = new ArrayList<String>(); // Determine all result files if (getFilesystemProvider().isDir(resultPath)) { for (String file : getFilesystemProvider().listFiles(resultPath)) { if (!getFilesystemProvider().isDir(file)) { resultFiles.add(resultPath + "/" + file); } } } else { resultFiles.add(resultPath); } // collect lines of all result files PriorityQueue<String> computedResult = new PriorityQueue<String>(); for (String resultFile : resultFiles) { // read each result file InputStream is = getFilesystemProvider().getInputStream(resultFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); // collect lines while (line != null) { computedResult.add(line); line = reader.readLine(); } reader.close(); } PriorityQueue<String> expectedResult = new PriorityQueue<String>(); StringTokenizer st = new StringTokenizer(expectedResultStr, "\n"); while (st.hasMoreElements()) { expectedResult.add(st.nextToken()); } // log expected and computed results if (LOG.isDebugEnabled()) { LOG.debug("Expected: " + expectedResult); LOG.debug("Computed: " + computedResult); } Assert.assertEquals("Computed and expected results have different size", expectedResult.size(), computedResult.size()); while (!expectedResult.isEmpty()) { String expectedLine = expectedResult.poll(); String computedLine = computedResult.poll(); if (LOG.isDebugEnabled()) LOG.debug("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">"); Assert.assertEquals("Computed and expected lines differ", expectedLine, computedLine); } }
From source file:edu.snu.leader.hierarchy.simple.test.TestIndividual.java
/** * Initialize the individual/*from w ww . ja va 2 s . com*/ * * @param allIndividuals */ public void initialize(List<TestIndividual> allIndividuals) { // Basically, we just need to find our neighbors // Build a priority queue to sort things for us PriorityQueue<TestNeighbor> sortedNeighbors = new PriorityQueue<TestNeighbor>(); // Iterate through all the individuals Iterator<TestIndividual> indIter = allIndividuals.iterator(); while (indIter.hasNext()) { // Get the individual TestIndividual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue TestNeighbor neighbor = new TestNeighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { _nearestNeighbors.add(sortedNeighbors.poll()); } }
From source file:edu.utsa.sifter.som.MainSOM.java
void initTerms() throws IOException { final Terms terms = MultiFields.getTerms(Reader, "body"); System.out.println("number of terms in index: " + terms.size()); final PriorityQueue<TermPair> topTerms = new PriorityQueue<TermPair>(Conf.MAX_VECTOR_FEATURES, new TermPair.TermPairComparator()); int num = 0;//from w ww .j av a 2s . c o m TermsEnum term = terms.iterator(null); while (term.next() != null) { final int count = term.docFreq(); final double r = ((double) count) / Reader.numDocs(); if (Conf.DOC_FREQ_THRESHOLD_LOW <= r && r <= Conf.DOC_FREQ_THRESHOLD_HIGH) { final String s = term.term().utf8ToString(); if (s.length() >= Conf.MIN_SOM_TERM_LENGTH) { if (topTerms.size() < Conf.MAX_VECTOR_FEATURES) { topTerms.add(new TermPair(s, count)); } else if (topTerms.peek().DocCount < count) { topTerms.remove(); topTerms.add(new TermPair(s, count)); } ++num; } } } System.out.println(num + " terms with in doc frequency range"); final int numFeatures = Math.min(topTerms.size(), Conf.MAX_VECTOR_FEATURES); TermIndices = new HashMap<String, Integer>((numFeatures * 4 + 1) / 3); // respect load factor Terms = new java.util.Vector<String>(numFeatures); Terms.setSize(numFeatures); System.out.println("the top " + numFeatures + " features will be used"); for (int i = numFeatures - 1; i > -1; --i) { // reverse order, to put top terms first TermPair t = topTerms.poll(); // least remaining TermIndices.put(t.Term, i); Terms.set(i, t.Term); // System.out.println("Including term " + t.Term + " (" + t.DocCount + ")"); } }
From source file:org.mule.util.store.MonitoredObjectStoreWrapper.java
public void expire() { try {//from w w w . ja va 2 s .c om final long now = System.nanoTime(); List<Serializable> keys = allKeys(); int excess = (allKeys().size() - maxEntries); if (maxEntries > 0 && excess > 0) { PriorityQueue<StoredObject<T>> q = new PriorityQueue<StoredObject<T>>(excess, new Comparator<StoredObject<T>>() { @Override public int compare(StoredObject<T> paramT1, StoredObject<T> paramT2) { return paramT2.timestamp.compareTo(paramT1.timestamp); } }); long youngest = Long.MAX_VALUE; for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); //TODO extract the entryTTL>0 outside of loop if (entryTTL > 0 && TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); excess--; if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } else { if (excess > 0 && (q.size() < excess || obj.timestamp < youngest)) { q.offer(obj); youngest = q.peek().timestamp; } if (excess > 0 && q.size() > excess) { q.poll(); youngest = q.peek().timestamp; } } } for (int i = 0; i < excess; i++) { Serializable key = q.poll().key; remove(key); } } else { if (entryTTL > 0) { for (Serializable key : keys) { StoredObject<T> obj = getStore().retrieve(key); if (TimeUnit.NANOSECONDS.toMillis(now - obj.getTimestamp()) >= entryTTL) { remove(key); } } } } } catch (Exception e) { logger.warn("Running expirty on " + baseStore + " threw " + e + ":" + e.getMessage()); } }
From source file:edu.snu.leader.hierarchy.simple.Individual.java
/** * Finds the nearest neighbors for this individual * * @param simState// w w w . j av a2 s . c o m */ private void findNearestNeighbors(SimulationState simState) { _LOG.trace("Entering findNearestNeighbors( simState )"); // Get the number of nearest neighbors _nearestNeighborCount = simState.getNearestNeighborCount(); // Build a priority queue to sort things for us PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>(); // Iterate through all the individuals Iterator<Individual> indIter = simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { // Get the individual Individual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { _nearestNeighbors.add(sortedNeighbors.poll()); } _LOG.trace("Leaving findNearestNeighbors( simState )"); }
From source file:org.apache.storm.daemon.logviewer.utils.DirectoryCleaner.java
/** * If totalSize of files exceeds the either the per-worker quota or global quota, * Logviewer deletes oldest inactive log files in a worker directory or in all worker dirs. * We use the parameter forPerDir to switch between the two deletion modes. * * @param dirs the list of directories to be scanned for deletion * @param quota the per-dir quota or the total quota for the all directories * @param forPerDir if true, deletion happens for a single dir; otherwise, for all directories globally * @param activeDirs only for global deletion, we want to skip the active logs in activeDirs * @return number of files deleted/*ww w.j a v a 2 s . co m*/ */ public DeletionMeta deleteOldestWhileTooLarge(List<Path> dirs, long quota, boolean forPerDir, Set<Path> activeDirs) throws IOException { long totalSize = 0; for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { totalSize += Files.size(path); } } } LOG.debug("totalSize: {} quota: {}", totalSize, quota); long toDeleteSize = totalSize - quota; if (toDeleteSize <= 0) { return DeletionMeta.EMPTY; } int deletedFiles = 0; long deletedSize = 0; // the oldest pq_size files in this directory will be placed in PQ, with the newest at the root PriorityQueue<Pair<Path, FileTime>> pq = new PriorityQueue<>(PQ_SIZE, Comparator.comparing((Pair<Path, FileTime> p) -> p.getRight()).reversed()); int round = 0; final Set<Path> excluded = new HashSet<>(); while (toDeleteSize > 0) { LOG.debug("To delete size is {}, start a new round of deletion, round: {}", toDeleteSize, round); for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { if (!excluded.contains(path)) { if (isFileEligibleToSkipDelete(forPerDir, activeDirs, dir, path)) { excluded.add(path); } else { Pair<Path, FileTime> p = Pair.of(path, Files.getLastModifiedTime(path)); if (pq.size() < PQ_SIZE) { pq.offer(p); } else if (p.getRight().toMillis() < pq.peek().getRight().toMillis()) { pq.poll(); pq.offer(p); } } } } } } if (!pq.isEmpty()) { // need to reverse the order of elements in PQ to delete files from oldest to newest Stack<Pair<Path, FileTime>> stack = new Stack<>(); while (!pq.isEmpty()) { stack.push(pq.poll()); } while (!stack.isEmpty() && toDeleteSize > 0) { Pair<Path, FileTime> pair = stack.pop(); Path file = pair.getLeft(); final String canonicalPath = file.toAbsolutePath().normalize().toString(); final long fileSize = Files.size(file); final long lastModified = pair.getRight().toMillis(); //Original implementation doesn't actually check if delete succeeded or not. try { Utils.forceDelete(file.toString()); LOG.info("Delete file: {}, size: {}, lastModified: {}", canonicalPath, fileSize, lastModified); toDeleteSize -= fileSize; deletedSize += fileSize; deletedFiles++; } catch (IOException e) { excluded.add(file); } } pq.clear(); round++; if (round >= MAX_ROUNDS) { if (forPerDir) { LOG.warn( "Reach the MAX_ROUNDS: {} during per-dir deletion, you may have too many files in " + "a single directory : {}, will delete the rest files in next interval.", MAX_ROUNDS, dirs.get(0).toAbsolutePath().normalize()); } else { LOG.warn("Reach the MAX_ROUNDS: {} during global deletion, you may have too many files, " + "will delete the rest files in next interval.", MAX_ROUNDS); } break; } } else { LOG.warn("No more files able to delete this round, but {} is over quota by {} MB", forPerDir ? "this directory" : "root directory", toDeleteSize * 1e-6); } } return new DeletionMeta(deletedSize, deletedFiles); }
From source file:io.warp10.script.functions.OPTDTW.java
@Override public Object apply(WarpScriptStack stack) throws WarpScriptException { Object o = stack.pop();/* ww w . ja va 2s. co m*/ if (!(o instanceof Number)) { throw new WarpScriptException( getName() + " expects a count of best restults to return on top of the stack."); } int count = ((Number) o).intValue(); o = stack.pop(); if (!(o instanceof List)) { throw new WarpScriptException(getName() + " expects a numeric list to use as query below the count."); } double[] query = new double[((List) o).size()]; int i = 0; for (Object oo : (List) o) { query[i++] = ((Number) oo).doubleValue(); } // Z-Normalize query double[] musigma = DoubleUtils.musigma(query, true); for (i = 0; i < query.length; i++) { query[i] = (query[i] - musigma[0]) / musigma[1]; } o = stack.pop(); if (!(o instanceof List)) { throw new WarpScriptException(getName() + " expects a numeric list as the sequence in which to find best matches below the 'query' list."); } double[] sequence = new double[((List) o).size()]; i = 0; for (Object oo : (List) o) { sequence[i++] = ((Number) oo).doubleValue(); } if (sequence.length <= query.length) { throw new WarpScriptException( getName() + " expects the query list to be shorter than the sequence list."); } double mindist = 0.0; PriorityQueue<Pair<Integer, Double>> distances = new PriorityQueue<Pair<Integer, Double>>( new Comparator<Pair<Integer, Double>>() { @Override public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) { return o1.getValue().compareTo(o2.getValue()); } }); double[] subsequence = new double[query.length]; for (i = 0; i <= sequence.length - query.length; i++) { System.arraycopy(sequence, i, subsequence, 0, query.length); // Z-Normalize the subsequence musigma = DoubleUtils.musigma(subsequence, true); for (int j = 0; j < subsequence.length; j++) { subsequence[j] = (subsequence[j] - musigma[0]) / musigma[1]; } double dist = dtw.compute(query, 0, query.length, subsequence, 0, query.length, mindist); if (dist < 0) { continue; } distances.add(new Pair<Integer, Double>(i, dist)); // // If the priority queue is of 'count' size, retrieve the largest distance and // use it as the threshold for the DTW computation // if (count > 0 && distances.size() >= count) { Object adist[] = distances.toArray(); mindist = ((Pair<Integer, Double>) adist[count - 1]).getValue(); } } List<List<Object>> results = new ArrayList<List<Object>>(); while (!distances.isEmpty()) { Pair<Integer, Double> entry = distances.poll(); List<Object> result = new ArrayList<Object>(); result.add(entry.getKey()); result.add(entry.getValue()); results.add(result); if (count > 0 && count == results.size()) { break; } } stack.push(results); return stack; }
From source file:delfos.group.grs.consensus.ConsensusGRS.java
public File getConsensusOutputXMLwithDesiredConsensusDegree(File consensusInputXML, double consensusDegree) { File consensusOutputDirectory = (File) getParameterValue(CONSENSUS_OUTPUT_FILES_DIRECTORY); String consensusInputXMLFileNameNoExtension = consensusInputXML.getName().substring(0, consensusInputXML.getName().lastIndexOf(".")); String consensusInputXMLInOutputDirectoryAbsolutePath = consensusOutputDirectory.getAbsolutePath() + File.separator + consensusInputXMLFileNameNoExtension; File consensusInputXMLInOutputDirectory = new File(consensusInputXMLInOutputDirectoryAbsolutePath); if (!consensusInputXML.exists()) { Global.showWarning("The input XML '" + consensusInputXMLInOutputDirectory + "' does not exists in the output directory"); return null; }/*from ww w. j a v a 2 s . co m*/ if (!consensusOutputDirectory.exists()) { Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' not exists"); return null; } if (!consensusOutputDirectory.isDirectory()) { Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' is not a directory"); return null; } List<File> childrenFiles = new ArrayList<>(Arrays.asList(consensusOutputDirectory.listFiles())); PriorityQueue<PriorityItem<File>> queue = new PriorityQueue<>(Collections.reverseOrder()); for (File consensusOutputFile : childrenFiles) { final String outputFileNameNoExtension = consensusOutputFile.getName().substring(0, consensusOutputFile.getName().lastIndexOf(".")); if (outputFileNameNoExtension.startsWith(consensusInputXMLFileNameNoExtension) && outputFileNameNoExtension.contains("Consenso")) { try { Global.showln(consensusOutputFile.getAbsolutePath()); double thisFileConsensusDegree = ConsensusOfIndividualRecommendationsToXML .readConsensusOutputXML(consensusOutputFile).consensusDegree; queue.add(new PriorityItem<>(consensusOutputFile, thisFileConsensusDegree)); } catch (JDOMException | IOException ex) { Global.showWarning(ex); } } } if (queue.isEmpty()) { return null; } if (Global.isVerboseAnnoying()) { Global.showInfoMessage("Found " + queue.size() + " consensus files"); } while (!queue.isEmpty()) { PriorityItem<File> priorityItem = queue.poll(); double consensusDegreeThisFile = priorityItem.getPriority(); if (consensusDegreeThisFile >= consensusDegree) { return priorityItem.getKey(); } } throw new IllegalStateException( "Consensus degree not reached for '" + consensusInputXMLFileNameNoExtension + "'"); }