List of usage examples for java.util PriorityQueue PriorityQueue
public PriorityQueue(int initialCapacity, Comparator<? super E> comparator)
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java
public List<KeyValuePair<Long, Double>> getMostSimilarItems(long item, int count) { Comparator<KeyValuePair<Long, Double>> similarityComparator = new Comparator<KeyValuePair<Long, Double>>() { @Override/*w ww. j av a2 s . co m*/ public int compare(KeyValuePair<Long, Double> arg0, KeyValuePair<Long, Double> arg1) { double difference = arg0.getValue().doubleValue() - arg1.getValue().doubleValue(); return (int) (100000 * difference); } }; PriorityQueue<KeyValuePair<Long, Double>> queue = new PriorityQueue<KeyValuePair<Long, Double>>(count, similarityComparator); LinkedList<KeyValuePair<Long, Double>> results = new LinkedList<KeyValuePair<Long, Double>>(); for (Long candidateItem : m_modelItemFactorizedValues.keySet()) { double similarity = calculateItemSimilarity(item, candidateItem); KeyValuePair<Long, Double> targetItem = new KeyValuePair<Long, Double>(candidateItem, similarity); queue.add(targetItem); } results.addAll(queue); return results; }
From source file:edu.usc.ir.geo.gazetteer.GeoNameResolver.java
/** * Select the best match for each location name extracted from a document, * choosing from among a list of lists of candidate matches. Filter uses the * following features: 1) edit distance between name and the resolved name, * choose smallest one 2) content (haven't implemented) * * @param resolvedEntities// ww w .ja v a 2s. co m * final result for the input stream * @param allCandidates * each location name may hits several documents, this is the * collection for all hitted documents * @param count * Number of results for one locations * @throws IOException * @throws RuntimeException */ private void pickBestCandidates(HashMap<String, List<Location>> resolvedEntities, HashMap<String, List<Location>> allCandidates, int count) { for (String extractedName : allCandidates.keySet()) { List<Location> cur = allCandidates.get(extractedName); if (cur.isEmpty()) continue;//continue if no results found int maxWeight = Integer.MIN_VALUE; //In case weight is equal for all return top element int bestIndex = 0; //Priority queue to return top elements PriorityQueue<Location> pq = new PriorityQueue<>(cur.size(), new Comparator<Location>() { @Override public int compare(Location o1, Location o2) { return Integer.compare(o2.getWeight(), o1.getWeight()); } }); for (int i = 0; i < cur.size(); ++i) { int weight = 0; // get cur's ith resolved entry's name String resolvedName = String.format(" %s ", cur.get(i).getName()); if (resolvedName.contains(String.format(" %s ", extractedName))) { // Assign a weight as per configuration if extracted name is found as a exact word in name weight = WEIGHT_NAME_MATCH; } else if (resolvedName.contains(extractedName)) { // Assign a weight as per configuration if extracted name is found partly in name weight = WEIGHT_NAME_PART_MATCH; } // get all alternate names of cur's ith resolved entry's String[] altNames = cur.get(i).getAlternateNames().split(","); float altEditDist = 0; for (String altName : altNames) { if (altName.contains(extractedName)) { altEditDist += StringUtils.getLevenshteinDistance(extractedName, altName); } } //lesser the edit distance more should be the weight weight += getCalibratedWeight(altNames.length, altEditDist); //Give preference to sorted results. 0th result should have more priority weight += (cur.size() - i) * WEIGHT_SORT_ORDER; cur.get(i).setWeight(weight); if (weight > maxWeight) { maxWeight = weight; bestIndex = i; } pq.add(cur.get(i)); } if (bestIndex == -1) continue; List<Location> resultList = new ArrayList<>(); for (int i = 0; i < count && !pq.isEmpty(); i++) { resultList.add(pq.poll()); } resolvedEntities.put(extractedName, resultList); } }
From source file:com.addthis.hydra.data.io.DiskBackedList2.java
/** * Sort the collection of elements using a standard external sort algorithm: sort each chunk of elements, then * merge the chunks into a new list, then switch to the new list. *///from w w w. j a v a 2 s . c o m public void sort(final Comparator<? super K> comp) { try { // Sort each chunk. Done if there is only one chunk. sortEachChunk(comp); if (chunks.size() <= 1) { return; } Comparator<Pair<K, Integer>> pairComp = new Comparator<Pair<K, Integer>>() { @Override public int compare(Pair<K, Integer> e1, Pair<K, Integer> e2) { return comp.compare(e1.getLeft(), e2.getLeft()); } }; // This heap stores the lowest remaining value from each chunk PriorityQueue<Pair<K, Integer>> heap = new PriorityQueue<>(chunks.size(), pairComp); ArrayList<Iterator> iterators = new ArrayList<>(chunks.size()); // Initialize the heap with one value per chunk close(); for (int i = 0; i < chunks.size(); i++) { Iterator<K> it = chunks.get(i).getChunkIterator(); iterators.add(i, it); if (it.hasNext()) { K elt = it.next(); if (elt != null) { heap.add(Pair.of(elt, i)); } } } // Make a new disk backed list to store sorted values. // When the number of chunks is large, the size of the output buffer needs to shrink to make up for the extra mem usage long storageMaxChunkSize = maxChunkSizeBytes / (1 + chunks.size() / 20); DiskBackedList2<K> storage = new DiskBackedList2<>(codec, storageMaxChunkSize, directory); // Repeatedly pull the smallest element from the heap while (!heap.isEmpty()) { Pair<K, Integer> leastElt = heap.poll(); storage.add(leastElt.getLeft()); @SuppressWarnings({ "unchecked" }) Iterator<K> polledIterator = iterators.get(leastElt.getRight()); if (polledIterator.hasNext()) { heap.add(Pair.of(polledIterator.next(), leastElt.getRight())); } } // Switch to the storage dbl's chunks storage.close(); chunks = storage.getChunks(); currentChunk = null; } catch (IOException io) { throw Throwables.propagate(io); } }
From source file:amfservices.actions.PGServicesAction.java
public Map<String, Object> spawnEggAction(String uid, String coteID, List<String> penguinIDs, long now) throws PGException { User user = User.getUser(uid);/*from w w w . j a va2s . co m*/ PGException.Assert(user.cotes().contains(coteID), PGError.INVALID_COTE, "Invalid cote"); final Cote cote = Cote.getCote(uid, coteID); for (String pengId : penguinIDs) { PGException.Assert(cote.penguins().contains(pengId), PGError.PENGUIN_NOT_IN_COTE, "Penguin isn't contained in cote"); } PriorityQueue<Penguin> penguins = new PriorityQueue(Math.max(penguinIDs.size(), 1), new Comparator<Penguin>() { @Override public int compare(Penguin p1, Penguin p2) { long p1NextSpawnTime = PenguinServices.inst().nextSpawn(p1, cote); long p2NextSpawnTime = PenguinServices.inst().nextSpawn(p2, cote); return (p1NextSpawnTime > p2NextSpawnTime) ? 1 : ((p1NextSpawnTime == p2NextSpawnTime) ? 0 : -1); } }); Map<String, Object> failData = new HashMap(); // init penguin entities for (String pengId : penguinIDs) { Penguin penguin = Penguin.getPenguin(uid, coteID, pengId); long nextSpawn = PenguinServices.inst().nextSpawn(penguin, cote); if (nextSpawn > now) { Map<String, Object> failPenguinData = new HashMap(2); failPenguinData.put(PGMacro.TIME_LAST_SPAWN, penguin.getLastSpawn()); failPenguinData.put(PGMacro.EGG_STORE, penguin.getLastEggStorage().getValue()); failData.put(pengId, failPenguinData); } else { penguins.add(penguin); } } Map<String, Object> successData = new HashMap(); List<String> limitedEggPenguins = new LinkedList(); // need for add egg BoxEgg boxEgg = BoxEgg.getBoxEgg(uid, coteID); Dog dog = Dog.getDog(uid, coteID); while (!penguins.isEmpty()) { Penguin penguin = penguins.poll(); long nextSpawn = PenguinServices.inst().nextSpawn(penguin, cote); String spawnedEggKind = PenguinServices.inst().spawnEgg(penguin, nextSpawn); EggStoreServices.EggStorage eggStorage = EggStoreServices.inst().addEgg(cote, boxEgg, dog, spawnedEggKind, now); if (eggStorage == EggStoreServices.EggStorage.LIMITED) { limitedEggPenguins.add(penguin.getPenguinID()); } penguin.setLastEggStorage(eggStorage); penguin.saveToDB(); Map<String, Object> penguinResp = new HashMap(); penguinResp.put(PGMacro.KIND, spawnedEggKind); penguinResp.put(PGMacro.EGG_STORE, eggStorage.getValue()); successData.put(penguin.getPenguinID(), penguinResp); } Map<String, Object> response = new HashMap(); response.put(PGMacro.SUCCESS, successData); response.put(PGMacro.FAIL, failData); response.put(PGMacro.SPAWN_LIMITED_PENGUINS, AMFBuilder.toAMF(limitedEggPenguins)); return response; }
From source file:com.linkedin.pinot.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public List<ServerToSegmentSetMap> computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigList) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ///* w ww . j a va 2 s . co m*/ // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a replica with fewer segments. // // Then, we pick a random replica for each segment, iterating from fewest replicas to most replicas, inversely // weighted by the number of segments already assigned to that replica. This ensures that we build a routing table // that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick TARGET_SERVER_COUNT_PER_QUERY distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does. // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a random replica with proper weighting // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigList); PriorityQueue<Pair<Map<String, Set<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, Set<String>>, Float>>() { @Override public int compare(Pair<Map<String, Set<String>>, Float> left, Pair<Map<String, Set<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, Set<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, Set<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<ServerToSegmentSetMap> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { Pair<Map<String, Set<String>>, Float> routingTableWithMetric = topRoutingTables.poll(); routingTables.add(new ServerToSegmentSetMap(routingTableWithMetric.getKey())); } return routingTables; }
From source file:net.bluehornreader.service.FeedCrawlerService.java
/** * If there's any change, it deletes all previous feeds. Whould be nicer to keep what already exists but not sure it's worth it * * @throws Exception// w ww . j a v a2s.c o m */ private void updateFeedList() throws Exception { int feedIdsSeq = crawlerDb.getFeedIdsSeq(crawler.crawlerId); if (crawler.feedIdsSeq == feedIdsSeq) { return; } LOG.info("Feed list changed"); HashMap<String, FeedInfo> newFeedMap = new HashMap<>(); Crawler newCrawler = crawlerDb.getCrawler(crawler.crawlerId); synchronized (this) { // Some feeds might be being crawled at this time; we don't want to end up with 2 entries for them in availableFeeds, so we don't add them HashSet<String> crawlingFeedIds = new HashSet<>(feedMap.keySet()); { HashSet<String> availableFeedIds = new HashSet<>(); for (FeedInfo feedInfo : availableFeeds) { availableFeedIds.add(feedInfo.feed.feedId); } crawlingFeedIds.removeAll(availableFeedIds); } availableFeeds = new PriorityQueue<>(newFeedMap.size() + 1, feedInfoComparator); for (String feedId : newCrawler.feedIds) { Feed feed = feedDb.get(feedId); if (feed == null) { LOG.warn(String.format( "FeedCrawlerService %s was asked to crawl feed %s but couldn't find such a feed", IP, feedId)); } else { FeedInfo feedInfo = feedMap.get(feedId); if (feedInfo == null) { feedInfo = new FeedInfo(feed, getSeq()); LOG.info("New feed to crawl: " + feedInfo); } newFeedMap.put(feedId, feedInfo); if (crawlingFeedIds.contains(feedId)) { LOG.info(String.format( "Feed %s is being currently crawled, so it's not going to be added to the list with available feeds", feedInfo)); } else { availableFeeds.add(feedInfo); } } } feedMap = newFeedMap; crawler.feedIdsSeq = feedIdsSeq; LOG.info("Feeds to crawl: " + feedMap); } }
From source file:com.linkedin.pinot.broker.routing.builder.GeneratorBasedRoutingTableBuilder.java
@Override public void computeRoutingTableFromExternalView(String tableName, ExternalView externalView, List<InstanceConfig> instanceConfigs) { // The default routing table algorithm tries to balance all available segments across all servers, so that each // server is hit on every query. This works fine with small clusters (say less than 20 servers) but for larger // clusters, this adds up to significant overhead (one request must be enqueued for each server, processed, // returned, deserialized, aggregated, etc.). ////from w w w . ja va 2 s . c o m // For large clusters, we want to avoid hitting every server, as this also has an adverse effect on client tail // latency. This is due to the fact that a query cannot return until it has received a response from each server, // and the greater the number of servers that are hit, the more likely it is that one of the servers will be a // straggler (eg. due to contention for query processing threads, GC, etc.). We also want to balance the segments // within any given routing table so that each server in the routing table has approximately the same number of // segments to process. // // To do so, we have a routing table generator that generates routing tables by picking a random subset of servers. // With this set of servers, we check if the set of segments served by these servers is complete. If the set of // segments served does not cover all of the segments, we compute the list of missing segments and pick a random // server that serves these missing segments until we have complete coverage of all the segments. // // We then order the segments in ascending number of replicas within our server set, in order to allocate the // segments with fewer replicas first. This ensures that segments that are 'easier' to allocate are more likely to // end up on a server with fewer segments. // // Then, we pick a server with least segments already assigned for each segment. This ensures that we build a // routing table that's as even as possible. // // The algorithm to generate a routing table is thus: // 1. Compute the inverse external view, a mapping of servers to segments // 2. For each routing table to generate: // a) Pick _targetNumServersPerQuery distinct servers // b) Check if the server set covers all the segments; if not, add additional servers until it does // c) Order the segments in our server set in ascending order of number of replicas present in our server set // d) For each segment, pick a server with least segments already assigned // e) Return that routing table // // Given that we can generate routing tables at will, we then generate many routing tables and use them to optimize // according to two criteria: the variance in workload per server for any individual table as well as the variance // in workload per server across all the routing tables. To do so, we generate an initial set of routing tables // according to a per-routing table metric and discard the worst routing tables. RoutingTableGenerator routingTableGenerator = buildRoutingTableGenerator(); routingTableGenerator.init(externalView, instanceConfigs); PriorityQueue<Pair<Map<String, List<String>>, Float>> topRoutingTables = new PriorityQueue<>( ROUTING_TABLE_COUNT, new Comparator<Pair<Map<String, List<String>>, Float>>() { @Override public int compare(Pair<Map<String, List<String>>, Float> left, Pair<Map<String, List<String>>, Float> right) { // Float.compare sorts in ascending order and we want a max heap, so we need to return the negative of the comparison return -Float.compare(left.getValue(), right.getValue()); } }); for (int i = 0; i < ROUTING_TABLE_COUNT; i++) { topRoutingTables.add(generateRoutingTableWithMetric(routingTableGenerator)); } // Generate routing more tables and keep the ROUTING_TABLE_COUNT top ones for (int i = 0; i < (ROUTING_TABLE_GENERATION_COUNT - ROUTING_TABLE_COUNT); ++i) { Pair<Map<String, List<String>>, Float> newRoutingTable = generateRoutingTableWithMetric( routingTableGenerator); Pair<Map<String, List<String>>, Float> worstRoutingTable = topRoutingTables.peek(); // If the new routing table is better than the worst one, keep it if (newRoutingTable.getRight() < worstRoutingTable.getRight()) { topRoutingTables.poll(); topRoutingTables.add(newRoutingTable); } } // Return the best routing tables List<Map<String, List<String>>> routingTables = new ArrayList<>(topRoutingTables.size()); while (!topRoutingTables.isEmpty()) { routingTables.add(topRoutingTables.poll().getKey()); } setRoutingTables(routingTables); }
From source file:main.java.RMDupper.java
/** * This Method reads a SAM File and parses the input * Currently, only merged Reads with the "M" Flag in front are checked for Duplicates. * R/F Flags are simply written into output File, also other "non-flagged" ones. *///from w w w.jav a 2 s . c o m public void readSAMFile() { Comparator<SAMRecord> samRecordComparatorForRecordBuffer = new SAMRecordPositionAndQualityComparator(); Comparator<SAMRecord> samRecordComparatorForDuplicateBuffer; if (this.allReadsAsMerged) { samRecordComparatorForDuplicateBuffer = new SAMRecordQualityComparator(); } else { samRecordComparatorForDuplicateBuffer = new SAMRecordQualityComparatorPreferMerged(); } PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>> recordBuffer = new PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>>( 1000, Comparator.comparing(ImmutableTriple<Integer, Integer, SAMRecord>::getRight, samRecordComparatorForRecordBuffer)); PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>> duplicateBuffer = new PriorityQueue<ImmutableTriple<Integer, Integer, SAMRecord>>( 1000, Comparator.comparing(ImmutableTriple<Integer, Integer, SAMRecord>::getRight, samRecordComparatorForDuplicateBuffer.reversed())); Set<String> discardSet = new HashSet<String>(1000); String referenceName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME; Iterator it = inputSam.iterator(); while (it.hasNext()) { SAMRecord curr = (SAMRecord) it.next(); if (curr.getReferenceName() == SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) { this.outputSam.addAlignment(curr); } else { if (referenceName == curr.getReferenceName()) { queueOrOutput(this.dupStats, this.oc, this.outputSam, this.allReadsAsMerged, recordBuffer, duplicateBuffer, discardSet, curr); } else { flushQueue(this.dupStats, this.oc, this.outputSam, this.allReadsAsMerged, recordBuffer, duplicateBuffer, discardSet); queueOrOutput(this.dupStats, this.oc, this.outputSam, this.allReadsAsMerged, recordBuffer, duplicateBuffer, discardSet, curr); referenceName = curr.getReferenceName(); } } this.dupStats.total++; if (this.dupStats.total % 100000 == 0) { if (!piped) { System.err.println("Reads treated: " + this.dupStats.total); } } } flushQueue(this.dupStats, this.oc, this.outputSam, this.allReadsAsMerged, recordBuffer, duplicateBuffer, discardSet); }
From source file:org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.FifoIntraQueuePreemptionPlugin.java
private PriorityQueue<TempAppPerPartition> createTempAppForResCalculation(String partition, Collection<FiCaSchedulerApp> apps, TAPriorityComparator taComparator) { PriorityQueue<TempAppPerPartition> orderedByPriority = new PriorityQueue<>(100, taComparator); // have an internal temp app structure to store intermediate data(priority) for (FiCaSchedulerApp app : apps) { Resource used = app.getAppAttemptResourceUsage().getUsed(partition); Resource amUsed = null;// w ww .j av a 2 s .c om if (!app.isWaitingForAMContainer()) { amUsed = app.getAMResource(partition); } Resource pending = app.getTotalPendingRequestsPerPartition().get(partition); Resource reserved = app.getAppAttemptResourceUsage().getReserved(partition); used = (used == null) ? Resources.createResource(0, 0) : used; amUsed = (amUsed == null) ? Resources.createResource(0, 0) : amUsed; pending = (pending == null) ? Resources.createResource(0, 0) : pending; reserved = (reserved == null) ? Resources.createResource(0, 0) : reserved; HashSet<String> partitions = new HashSet<String>( app.getAppAttemptResourceUsage().getNodePartitionsSet()); partitions.addAll(app.getTotalPendingRequestsPerPartition().keySet()); // Create TempAppPerQueue for further calculation. TempAppPerPartition tmpApp = new TempAppPerPartition(app, Resources.clone(used), Resources.clone(amUsed), Resources.clone(reserved), Resources.clone(pending)); // Set ideal allocation of app as 0. tmpApp.idealAssigned = Resources.createResource(0, 0); orderedByPriority.add(tmpApp); } return orderedByPriority; }
From source file:org.apache.hadoop.hdfs.server.namenode.TestFileJournalManager.java
private static EditLogInputStream getJournalInputStream(JournalManager jm, long txId, boolean inProgressOk) throws IOException { final PriorityQueue<EditLogInputStream> allStreams = new PriorityQueue<EditLogInputStream>(64, JournalSet.EDIT_LOG_INPUT_STREAM_COMPARATOR); jm.selectInputStreams(allStreams, txId, inProgressOk); EditLogInputStream elis = null, ret; try {/*from w w w .jav a 2 s . co m*/ while ((elis = allStreams.poll()) != null) { if (elis.getFirstTxId() > txId) { break; } if (elis.getLastTxId() < txId) { elis.close(); continue; } elis.skipUntil(txId); ret = elis; elis = null; return ret; } } finally { IOUtils.cleanup(LOG, allStreams.toArray(new EditLogInputStream[0])); IOUtils.cleanup(LOG, elis); } return null; }