List of usage examples for java.util PriorityQueue add
public boolean add(E e)
From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigList) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ////w w w .ja va 2 s . co m // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a replica with fewer segments. // // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table // that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does. // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a random replica with proper weighting // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigList); PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() { @Override public int compare(Pair<Map<String, Set<String>>, Float> left, Pair<Map<String, Set<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll(); routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey())); } return routingTables; }
From source file:org.apache.hadoop.hbase.io.hfile.LruBlockCache.java
/** * Eviction method.//from w w w .j a va 2s.c om */ void evict() { // Ensure only one eviction at a time if (!evictionLock.tryLock()) return; try { evictionInProgress = true; long currentSize = this.size.get(); long bytesToFree = currentSize - minSize(); if (LOG.isTraceEnabled()) { LOG.trace("Block cache LRU eviction started; Attempting to free " + StringUtils.byteDesc(bytesToFree) + " of total=" + StringUtils.byteDesc(currentSize)); } if (bytesToFree <= 0) return; // Instantiate priority buckets BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize, singleSize()); BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize, multiSize()); BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize, memorySize()); // Scan entire map putting into appropriate buckets for (CachedBlock cachedBlock : map.values()) { switch (cachedBlock.getPriority()) { case SINGLE: { bucketSingle.add(cachedBlock); break; } case MULTI: { bucketMulti.add(cachedBlock); break; } case MEMORY: { bucketMemory.add(cachedBlock); break; } } } long bytesFreed = 0; if (forceInMemory || memoryFactor > 0.999f) { long s = bucketSingle.totalSize(); long m = bucketMulti.totalSize(); if (bytesToFree > (s + m)) { // this means we need to evict blocks in memory bucket to make room, // so the single and multi buckets will be emptied bytesFreed = bucketSingle.free(s); bytesFreed += bucketMulti.free(m); bytesFreed += bucketMemory.free(bytesToFree - bytesFreed); } else { // this means no need to evict block in memory bucket, // and we try best to make the ratio between single-bucket and // multi-bucket is 1:2 long bytesRemain = s + m - bytesToFree; if (3 * s <= bytesRemain) { // single-bucket is small enough that no eviction happens for it // hence all eviction goes from multi-bucket bytesFreed = bucketMulti.free(bytesToFree); } else if (3 * m <= 2 * bytesRemain) { // multi-bucket is small enough that no eviction happens for it // hence all eviction goes from single-bucket bytesFreed = bucketSingle.free(bytesToFree); } else { // both buckets need to evict some blocks bytesFreed = bucketSingle.free(s - bytesRemain / 3); if (bytesFreed < bytesToFree) { bytesFreed += bucketMulti.free(bytesToFree - bytesFreed); } } } } else { PriorityQueue<BlockBucket> bucketQueue = new PriorityQueue<BlockBucket>(3); bucketQueue.add(bucketSingle); bucketQueue.add(bucketMulti); bucketQueue.add(bucketMemory); int remainingBuckets = 3; BlockBucket bucket; while ((bucket = bucketQueue.poll()) != null) { long overflow = bucket.overflow(); if (overflow > 0) { long bucketBytesToFree = Math.min(overflow, (bytesToFree - bytesFreed) / remainingBuckets); bytesFreed += bucket.free(bucketBytesToFree); } remainingBuckets--; } } if (LOG.isTraceEnabled()) { long single = bucketSingle.totalSize(); long multi = bucketMulti.totalSize(); long memory = bucketMemory.totalSize(); LOG.trace("Block cache LRU eviction completed; " + "freed=" + StringUtils.byteDesc(bytesFreed) + ", " + "total=" + StringUtils.byteDesc(this.size.get()) + ", " + "single=" + StringUtils.byteDesc(single) + ", " + "multi=" + StringUtils.byteDesc(multi) + ", " + "memory=" + StringUtils.byteDesc(memory)); } } finally { stats.evict(); evictionInProgress = false; evictionLock.unlock(); } }
From source file:edu.snu.leader.hierarchy.simple.Individual.java
/** * Finds the nearest neighbors for this individual * * @param simState/*from w w w . j av a2 s .c o m*/ */ private void findNearestNeighbors(SimulationState simState) { _LOG.trace("Entering findNearestNeighbors( simState )"); // Get the number of nearest neighbors _nearestNeighborCount = simState.getNearestNeighborCount(); // Build a priority queue to sort things for us PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>(); // Iterate through all the individuals Iterator<Individual> indIter = simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { // Get the individual Individual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { _nearestNeighbors.add(sortedNeighbors.poll()); } _LOG.trace("Leaving findNearestNeighbors( simState )"); }
From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigs) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ///* w w w . ja v a 2 s . c o m*/ // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a server with fewer segments. // // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a // routing table that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick _targetNumServersPerQuery distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a server with least segments already assigned // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigs); PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() { @Override public int compare(Pair<Map<String, List<String>>, Float> left, Pair<Map<String, List<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { routingTables.add(topRoutingTables.poll().getKey()); } setRoutingTables(routingTables); }
From source file:com.joliciel.talismane.posTagger.PosTaggerImpl.java
@Override public List<PosTagSequence> tagSentence(List<TokenSequence> tokenSequences) { MONITOR.startTask("tagSentence"); try {/*from w w w. j a v a 2s . c om*/ MONITOR.startTask("apply filters"); try { for (TokenSequence tokenSequence : tokenSequences) { for (TokenSequenceFilter tokenFilter : this.preProcessingFilters) { tokenFilter.apply(tokenSequence); } } } finally { MONITOR.endTask("apply filters"); } int sentenceLength = tokenSequences.get(0).getText().length(); TreeMap<Double, PriorityQueue<PosTagSequence>> heaps = new TreeMap<Double, PriorityQueue<PosTagSequence>>(); PriorityQueue<PosTagSequence> heap0 = new PriorityQueue<PosTagSequence>(); for (TokenSequence tokenSequence : tokenSequences) { // add an empty PosTagSequence for each token sequence PosTagSequence emptySequence = this.getPosTaggerService().getPosTagSequence(tokenSequence, 0); emptySequence.setScoringStrategy(decisionMaker.getDefaultScoringStrategy()); heap0.add(emptySequence); } heaps.put(0.0, heap0); PriorityQueue<PosTagSequence> finalHeap = null; while (heaps.size() > 0) { Entry<Double, PriorityQueue<PosTagSequence>> heapEntry = heaps.pollFirstEntry(); if (LOG.isTraceEnabled()) { LOG.trace("heap key: " + heapEntry.getKey() + ", sentence length: " + sentenceLength); } if (heapEntry.getKey() == sentenceLength) { finalHeap = heapEntry.getValue(); break; } PriorityQueue<PosTagSequence> previousHeap = heapEntry.getValue(); // limit the breadth to K int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size(); for (int j = 0; j < maxSequences; j++) { PosTagSequence history = previousHeap.poll(); Token token = history.getNextToken(); if (LOG.isTraceEnabled()) { LOG.trace("#### Next history ( " + heapEntry.getKey() + "): " + history.toString()); LOG.trace("Prob: " + df.format(history.getScore())); LOG.trace("Token: " + token.getText()); StringBuilder sb = new StringBuilder(); for (Token oneToken : history.getTokenSequence().listWithWhiteSpace()) { if (oneToken.equals(token)) sb.append("[" + oneToken + "]"); else sb.append(oneToken); } LOG.trace(sb.toString()); } PosTaggerContext context = this.getPosTaggerFeatureService().getContext(token, history); List<Decision<PosTag>> decisions = new ArrayList<Decision<PosTag>>(); // test the positive rules on the current token boolean ruleApplied = false; if (posTaggerPositiveRules != null) { MONITOR.startTask("check rules"); try { for (PosTaggerRule rule : posTaggerPositiveRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { Decision<PosTag> positiveRuleDecision = TalismaneSession.getPosTagSet() .createDefaultDecision(rule.getTag()); decisions.add(positiveRuleDecision); positiveRuleDecision.addAuthority(rule.getCondition().getName()); ruleApplied = true; if (LOG.isTraceEnabled()) { LOG.trace("Rule applies. Setting posTag to: " + rule.getTag().getCode()); } break; } } } finally { MONITOR.endTask("check rules"); } } if (!ruleApplied) { // test the features on the current token List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>(); MONITOR.startTask("analyse features"); try { for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) { MONITOR.startTask(posTaggerFeature.getCollectionName()); try { RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<?> featureResult = posTaggerFeature.check(context, env); if (featureResult != null) featureResults.add(featureResult); } finally { MONITOR.endTask(posTaggerFeature.getCollectionName()); } } if (LOG.isTraceEnabled()) { for (FeatureResult<?> result : featureResults) { LOG.trace(result.toString()); } } } finally { MONITOR.endTask("analyse features"); } // evaluate the feature results using the maxent model MONITOR.startTask("make decision"); decisions = this.decisionMaker.decide(featureResults); MONITOR.endTask("make decision"); for (ClassificationObserver<PosTag> observer : this.observers) { observer.onAnalyse(token, featureResults, decisions); } // apply the negative rules Set<PosTag> eliminatedPosTags = new TreeSet<PosTag>(); if (posTaggerNegativeRules != null) { MONITOR.startTask("check negative rules"); try { for (PosTaggerRule rule : posTaggerNegativeRules) { if (LOG.isTraceEnabled()) { LOG.trace("Checking negative rule: " + rule.getCondition().getName()); } RuntimeEnvironment env = this.featureService.getRuntimeEnvironment(); FeatureResult<Boolean> ruleResult = rule.getCondition().check(context, env); if (ruleResult != null && ruleResult.getOutcome()) { eliminatedPosTags.add(rule.getTag()); if (LOG.isTraceEnabled()) { LOG.trace( "Rule applies. Eliminating posTag: " + rule.getTag().getCode()); } } } if (eliminatedPosTags.size() > 0) { List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (!eliminatedPosTags.contains(decision.getOutcome())) { decisionShortList.add(decision); } else { LOG.trace("Eliminating decision: " + decision.toString()); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } else { LOG.debug("All decisions eliminated! Restoring original decisions."); } } } finally { MONITOR.endTask("check negative rules"); } } // is this a known word in the lexicon? MONITOR.startTask("apply constraints"); try { if (LOG.isTraceEnabled()) { String posTags = ""; for (PosTag onePosTag : token.getPossiblePosTags()) { posTags += onePosTag.getCode() + ","; } LOG.trace("Token: " + token.getText() + ". PosTags: " + posTags); } List<Decision<PosTag>> decisionShortList = new ArrayList<Decision<PosTag>>(); for (Decision<PosTag> decision : decisions) { if (decision.getProbability() >= MIN_PROB_TO_STORE) { decisionShortList.add(decision); } } if (decisionShortList.size() > 0) { decisions = decisionShortList; } } finally { MONITOR.endTask("apply constraints"); } } // has a rule been applied? // add new TaggedTokenSequences to the heap, one for each outcome provided by MaxEnt MONITOR.startTask("heap sort"); for (Decision<PosTag> decision : decisions) { if (LOG.isTraceEnabled()) LOG.trace("Outcome: " + decision.getOutcome() + ", " + decision.getProbability()); PosTaggedToken posTaggedToken = this.getPosTaggerService().getPosTaggedToken(token, decision); PosTagSequence sequence = this.getPosTaggerService().getPosTagSequence(history); sequence.addPosTaggedToken(posTaggedToken); if (decision.isStatistical()) sequence.addDecision(decision); double heapIndex = token.getEndIndex(); // add another half for an empty token, to differentiate it from regular ones if (token.getStartIndex() == token.getEndIndex()) heapIndex += 0.5; // if it's the last token, make sure we end if (token.getIndex() == sequence.getTokenSequence().size() - 1) heapIndex = sentenceLength; if (LOG.isTraceEnabled()) LOG.trace("Heap index: " + heapIndex); PriorityQueue<PosTagSequence> heap = heaps.get(heapIndex); if (heap == null) { heap = new PriorityQueue<PosTagSequence>(); heaps.put(heapIndex, heap); } heap.add(sequence); } // next outcome for this token MONITOR.endTask("heap sort"); } // next history } // next atomic index // return the best sequence on the heap List<PosTagSequence> sequences = new ArrayList<PosTagSequence>(); int i = 0; while (!finalHeap.isEmpty()) { sequences.add(finalHeap.poll()); i++; if (i >= this.getBeamWidth()) break; } // apply post-processing filters LOG.debug("####Final postag sequences:"); int j = 1; for (PosTagSequence sequence : sequences) { if (LOG.isDebugEnabled()) { LOG.debug("Sequence " + (j++) + ", score=" + df.format(sequence.getScore())); LOG.debug("Sequence before filters: " + sequence); } for (PosTagSequenceFilter filter : this.postProcessingFilters) filter.apply(sequence); if (LOG.isDebugEnabled()) { LOG.debug("Sequence after filters: " + sequence); } } return sequences; } finally { MONITOR.endTask("tagSentence"); } }
From source file:edu.utsa.sifter.som.MainSOM.java
void initTerms() throws IOException { final Terms terms = MultiFields.getTerms(Reader, "body"); System.out.println("number of terms in index: " + terms.size()); final PriorityQueue<TermPair> topTerms = new PriorityQueue<TermPair>(Conf.MAX_VECTOR_FEATURES, new TermPair.TermPairComparator()); int num = 0;/*w w w.j av a 2 s.c o m*/ TermsEnum term = terms.iterator(null); while (term.next() != null) { final int count = term.docFreq(); final double r = ((double) count) / Reader.numDocs(); if (Conf.DOC_FREQ_THRESHOLD_LOW <= r && r <= Conf.DOC_FREQ_THRESHOLD_HIGH) { final String s = term.term().utf8ToString(); if (s.length() >= Conf.MIN_SOM_TERM_LENGTH) { if (topTerms.size() < Conf.MAX_VECTOR_FEATURES) { topTerms.add(new TermPair(s, count)); } else if (topTerms.peek().DocCount < count) { topTerms.remove(); topTerms.add(new TermPair(s, count)); } ++num; } } } System.out.println(num + " terms with in doc frequency range"); final int numFeatures = Math.min(topTerms.size(), Conf.MAX_VECTOR_FEATURES); TermIndices = new HashMap<String, Integer>((numFeatures * 4 + 1) / 3); // respect load factor Terms = new java.util.Vector<String>(numFeatures); Terms.setSize(numFeatures); System.out.println("the top " + numFeatures + " features will be used"); for (int i = numFeatures - 1; i > -1; --i) { // reverse order, to put top terms first TermPair t = topTerms.poll(); // least remaining TermIndices.put(t.Term, i); Terms.set(i, t.Term); // System.out.println("Including term " + t.Term + " (" + t.DocCount + ")"); } }
From source file:delfos.group.grs.consensus.ConsensusGRS.java
public File getConsensusOutputXMLwithDesiredConsensusDegree(File consensusInputXML, double consensusDegree) { File consensusOutputDirectory = (File) getParameterValue(CONSENSUS_OUTPUT_FILES_DIRECTORY); String consensusInputXMLFileNameNoExtension = consensusInputXML.getName().substring(0, consensusInputXML.getName().lastIndexOf(".")); String consensusInputXMLInOutputDirectoryAbsolutePath = consensusOutputDirectory.getAbsolutePath() + File.separator + consensusInputXMLFileNameNoExtension; File consensusInputXMLInOutputDirectory = new File(consensusInputXMLInOutputDirectoryAbsolutePath); if (!consensusInputXML.exists()) { Global.showWarning("The input XML '" + consensusInputXMLInOutputDirectory + "' does not exists in the output directory"); return null; }//from www . ja v a 2s.c om if (!consensusOutputDirectory.exists()) { Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' not exists"); return null; } if (!consensusOutputDirectory.isDirectory()) { Global.showWarning("'" + consensusOutputDirectory.getAbsolutePath() + "' is not a directory"); return null; } List<File> childrenFiles = new ArrayList<>(Arrays.asList(consensusOutputDirectory.listFiles())); PriorityQueue<PriorityItem<File>> queue = new PriorityQueue<>(Collections.reverseOrder()); for (File consensusOutputFile : childrenFiles) { final String outputFileNameNoExtension = consensusOutputFile.getName().substring(0, consensusOutputFile.getName().lastIndexOf(".")); if (outputFileNameNoExtension.startsWith(consensusInputXMLFileNameNoExtension) && outputFileNameNoExtension.contains("Consenso")) { try { Global.showln(consensusOutputFile.getAbsolutePath()); double thisFileConsensusDegree = ConsensusOfIndividualRecommendationsToXML .readConsensusOutputXML(consensusOutputFile).consensusDegree; queue.add(new PriorityItem<>(consensusOutputFile, thisFileConsensusDegree)); } catch (JDOMException | IOException ex) { Global.showWarning(ex); } } } if (queue.isEmpty()) { return null; } if (Global.isVerboseAnnoying()) { Global.showInfoMessage("Found " + queue.size() + " consensus files"); } while (!queue.isEmpty()) { PriorityItem<File> priorityItem = queue.poll(); double consensusDegreeThisFile = priorityItem.getPriority(); if (consensusDegreeThisFile >= consensusDegree) { return priorityItem.getKey(); } } throw new IllegalStateException( "Consensus degree not reached for '" + consensusInputXMLFileNameNoExtension + "'"); }
From source file:com.joliciel.jochre.lexicon.MostLikelyWordChooserImpl.java
public LetterSequence chooseMostLikelyWord(List<LetterSequence> heap, List<LetterSequence> holdoverHeap, int n) {/*from w ww . ja v a2 s. com*/ LetterSequence bestSequence = null; List<LetterSequence> holdoverWithDash = new ArrayList<LetterSequence>(n); List<LetterSequence> holdoverWithoutDash = new ArrayList<LetterSequence>(n); int i = 0; for (LetterSequence holdoverSequence : holdoverHeap) { if (i >= n) break; if (holdoverSequence.toString().endsWith("-")) holdoverWithDash.add(holdoverSequence); else holdoverWithoutDash.add(holdoverSequence); i++; } PriorityQueue<LetterSequence> combinedHeap = new PriorityQueue<LetterSequence>(); for (LetterSequence sequenceWithDash : holdoverWithDash) { // find the dash that needs to be skipped at the end of sequence 1 for (int j = sequenceWithDash.size() - 1; j >= 0; j--) { Letter outcome = sequenceWithDash.get(j); if (outcome.getString().equals("-")) { sequenceWithDash.setDashToSkip(j); break; } } for (LetterSequence letterSequence : heap) { LetterSequence combinedSequence = this.getLetterGuesserService().getLetterSequence(sequenceWithDash, letterSequence); combinedHeap.add(combinedSequence); } } List<LetterSequence> combinedSequences = new ArrayList<LetterSequence>(); for (i = 0; i < n; i++) { if (combinedHeap.isEmpty()) break; combinedSequences.add(combinedHeap.poll()); } if (holdoverWithoutDash.size() == 0) { // all holdovers end with a dash // therefore we must combine the two sequences bestSequence = this.chooseMostLikelyWord(combinedSequences, n); } else { // some holdovers end with a dash, others don't // need to compare combined sequences with individual sequences LetterSequence bestCombinedSequence = this.chooseMostLikelyWord(combinedSequences, n); // Originally we only included sequences without dashes here // However, this falsifies the results towards those without a dash // especially in the case where sequence 1 or sequence 2 is also a common word (e.g. der in Yiddish) // PriorityQueue<LetterSequence> holdoverHeapWithoutDash = new PriorityQueue<LetterSequence>(holdoverWithoutDash); // LetterSequence bestHoldoverSequenceWithoutDash = this.chooseMostLikelyWord(holdoverHeapWithoutDash, n); // Changed it to the following: LetterSequence bestHoldoverSequence = this.chooseMostLikelyWord(holdoverHeap, n); LetterSequence bestNextRowSequence = this.chooseMostLikelyWord(heap, n); if (LOG.isDebugEnabled()) { LOG.debug("Best combined: " + bestCombinedSequence.toString() + ". Adjusted score: " + bestCombinedSequence.getAdjustedScore()); LOG.debug("Best seq1 separate: " + bestHoldoverSequence.toString() + ". Adjusted score: " + bestHoldoverSequence.getAdjustedScore()); LOG.debug("Best seq2 separate: " + bestNextRowSequence.toString() + ". Adjusted score: " + bestNextRowSequence.getAdjustedScore()); } // Now, to compare the best combined with the best separate scores, we need to get a geometric mean of the shapes // in the best separate ones, and adjust for the lowest frequency word LetterSequence separateSequence = this.letterGuesserService.getLetterSequence(bestHoldoverSequence, bestNextRowSequence); int minFrequency = bestHoldoverSequence.getFrequency() < bestNextRowSequence.getFrequency() ? bestHoldoverSequence.getFrequency() : bestNextRowSequence.getFrequency(); double freqLog = this.getFrequencyAdjustment(minFrequency); double separateAdjustedScore = separateSequence.getScore() * freqLog + additiveSmoothing; separateSequence.setAdjustedScore(separateAdjustedScore); if (LOG.isDebugEnabled()) LOG.debug("Best separate: " + separateSequence.toString() + ". Score: " + separateSequence.getScore() + ". Freq: " + minFrequency + ". Adjusted: " + freqLog + ". Adjusted score: " + separateSequence.getAdjustedScore()); if (bestCombinedSequence.getAdjustedScore() > separateAdjustedScore) { if (LOG.isDebugEnabled()) LOG.debug("Using combined sequence"); bestSequence = bestCombinedSequence; } else { if (LOG.isDebugEnabled()) LOG.debug("Using separate sequences"); bestSequence = this.getLetterGuesserService().getLetterSequence(bestHoldoverSequence, bestNextRowSequence); } if (LOG.isDebugEnabled()) LOG.debug("Best with holdover: " + bestSequence.toString()); } return bestSequence; }
From source file:io.warp10.script.functions.OPTDTW.java
@Override public Object apply(WarpScriptStack stack) throws WarpScriptException { Object o = stack.pop();//from w w w . j a v a2 s. c o m if (!(o instanceof Number)) { throw new WarpScriptException( getName() + " expects a count of best restults to return on top of the stack."); } int count = ((Number) o).intValue(); o = stack.pop(); if (!(o instanceof List)) { throw new WarpScriptException(getName() + " expects a numeric list to use as query below the count."); } double[] query = new double[((List) o).size()]; int i = 0; for (Object oo : (List) o) { query[i++] = ((Number) oo).doubleValue(); } // Z-Normalize query double[] musigma = DoubleUtils.musigma(query, true); for (i = 0; i < query.length; i++) { query[i] = (query[i] - musigma[0]) / musigma[1]; } o = stack.pop(); if (!(o instanceof List)) { throw new WarpScriptException(getName() + " expects a numeric list as the sequence in which to find best matches below the 'query' list."); } double[] sequence = new double[((List) o).size()]; i = 0; for (Object oo : (List) o) { sequence[i++] = ((Number) oo).doubleValue(); } if (sequence.length <= query.length) { throw new WarpScriptException( getName() + " expects the query list to be shorter than the sequence list."); } double mindist = 0.0; PriorityQueue<Pair<Integer, Double>> distances = new PriorityQueue<Pair<Integer, Double>>( new Comparator<Pair<Integer, Double>>() { @Override public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) { return o1.getValue().compareTo(o2.getValue()); } }); double[] subsequence = new double[query.length]; for (i = 0; i <= sequence.length - query.length; i++) { System.arraycopy(sequence, i, subsequence, 0, query.length); // Z-Normalize the subsequence musigma = DoubleUtils.musigma(subsequence, true); for (int j = 0; j < subsequence.length; j++) { subsequence[j] = (subsequence[j] - musigma[0]) / musigma[1]; } double dist = dtw.compute(query, 0, query.length, subsequence, 0, query.length, mindist); if (dist < 0) { continue; } distances.add(new Pair<Integer, Double>(i, dist)); // // If the priority queue is of 'count' size, retrieve the largest distance and // use it as the threshold for the DTW computation // if (count > 0 && distances.size() >= count) { Object adist[] = distances.toArray(); mindist = ((Pair<Integer, Double>) adist[count - 1]).getValue(); } } List<List<Object>> results = new ArrayList<List<Object>>(); while (!distances.isEmpty()) { Pair<Integer, Double> entry = distances.poll(); List<Object> result = new ArrayList<Object>(); result.add(entry.getKey()); result.add(entry.getValue()); results.add(result); if (count > 0 && count == results.size()) { break; } } stack.push(results); return stack; }
From source file:com.addthis.hydra.data.io.DiskBackedList2.java
/** * Sort the collection of elements using a standard external sort algorithm: sort each chunk of elements, then * merge the chunks into a new list, then switch to the new list. *///from ww w . ja va 2 s.c om public void sort(final Comparator<? super K> comp) { try { // Sort each chunk. Done if there is only one chunk. sortEachChunk(comp); if (chunks.size() <= 1) { return; } Comparator<Pair<K, Integer>> pairComp = new Comparator<Pair<K, Integer>>() { @Override public int compare(Pair<K, Integer> e1, Pair<K, Integer> e2) { return comp.compare(e1.getLeft(), e2.getLeft()); } }; // This heap stores the lowest remaining value from each chunk PriorityQueue<Pair<K, Integer>> heap = new PriorityQueue<>(chunks.size(), pairComp); ArrayList<Iterator> iterators = new ArrayList<>(chunks.size()); // Initialize the heap with one value per chunk close(); for (int i = 0; i < chunks.size(); i++) { Iterator<K> it = chunks.get(i).getChunkIterator(); iterators.add(i, it); if (it.hasNext()) { K elt = it.next(); if (elt != null) { heap.add(Pair.of(elt, i)); } } } // Make a new disk backed list to store sorted values. // When the number of chunks is large, the size of the output buffer needs to shrink to make up for the extra mem usage long storageMaxChunkSize = maxChunkSizeBytes / (1 + chunks.size() / 20); DiskBackedList2<K> storage = new DiskBackedList2<>(codec, storageMaxChunkSize, directory); // Repeatedly pull the smallest element from the heap while (!heap.isEmpty()) { Pair<K, Integer> leastElt = heap.poll(); storage.add(leastElt.getLeft()); @SuppressWarnings({ "unchecked" }) Iterator<K> polledIterator = iterators.get(leastElt.getRight()); if (polledIterator.hasNext()) { heap.add(Pair.of(polledIterator.next(), leastElt.getRight())); } } // Switch to the storage dbl's chunks storage.close(); chunks = storage.getChunks(); currentChunk = null; } catch (IOException io) { throw Throwables.propagate(io); } }