List of usage examples for java.util PriorityQueue PriorityQueue
public PriorityQueue(int initialCapacity, Comparator<? super E> comparator)
From source file:org.apache.drill.exec.store.mongo.MongoGroupScan.java
@Override public void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException { logger.debug("Incoming endpoints :" + endpoints); watch.reset();/* www . jav a 2s .c om*/ watch.start(); final int numSlots = endpoints.size(); int totalAssignmentsTobeDone = chunksMapping.size(); Preconditions.checkArgument(numSlots <= totalAssignmentsTobeDone, String.format( "Incoming endpoints %d is greater than number of chunks %d", numSlots, totalAssignmentsTobeDone)); final int minPerEndpointSlot = (int) Math.floor((double) totalAssignmentsTobeDone / numSlots); final int maxPerEndpointSlot = (int) Math.ceil((double) totalAssignmentsTobeDone / numSlots); endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots); Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap(); for (int i = 0; i < numSlots; ++i) { endpointFragmentMapping.put(i, new ArrayList<MongoSubScanSpec>(maxPerEndpointSlot)); String hostname = endpoints.get(i).getAddress(); Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname); if (hostIndexQueue == null) { hostIndexQueue = Lists.newLinkedList(); endpointHostIndexListMap.put(hostname, hostIndexQueue); } hostIndexQueue.add(i); } Set<Entry<String, List<ChunkInfo>>> chunksToAssignSet = Sets.newHashSet(chunksInverseMapping.entrySet()); for (Iterator<Entry<String, List<ChunkInfo>>> chunksIterator = chunksToAssignSet.iterator(); chunksIterator .hasNext();) { Entry<String, List<ChunkInfo>> chunkEntry = chunksIterator.next(); Queue<Integer> slots = endpointHostIndexListMap.get(chunkEntry.getKey()); if (slots != null) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { Integer slotIndex = slots.poll(); List<MongoSubScanSpec> subScanSpecList = endpointFragmentMapping.get(slotIndex); subScanSpecList.add(buildSubScanSpecAndGet(chunkInfo)); slots.offer(slotIndex); } chunksIterator.remove(); } } PriorityQueue<List<MongoSubScanSpec>> minHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR); PriorityQueue<List<MongoSubScanSpec>> maxHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV); for (List<MongoSubScanSpec> listOfScan : endpointFragmentMapping.values()) { if (listOfScan.size() < minPerEndpointSlot) { minHeap.offer(listOfScan); } else if (listOfScan.size() > minPerEndpointSlot) { maxHeap.offer(listOfScan); } } if (chunksToAssignSet.size() > 0) { for (Entry<String, List<ChunkInfo>> chunkEntry : chunksToAssignSet) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { List<MongoSubScanSpec> smallestList = minHeap.poll(); smallestList.add(buildSubScanSpecAndGet(chunkInfo)); minHeap.offer(smallestList); } } } while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) { List<MongoSubScanSpec> smallestList = minHeap.poll(); List<MongoSubScanSpec> largestList = maxHeap.poll(); smallestList.add(largestList.remove(largestList.size() - 1)); if (largestList.size() > minPerEndpointSlot) { maxHeap.offer(largestList); } if (smallestList.size() < minPerEndpointSlot) { minHeap.offer(smallestList); } } logger.debug("Built assignment map in {} s.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, endpoints, endpointFragmentMapping.toString()); }
From source file:org.mskcc.cbio.portal.servlet.NetworkServlet.java
/** * * @param network//from w w w . j av a2 s .c o m * @param n * @return */ private List<Node> getNodesToRemove(final Network network, final double diffusion, final int n) { final Map<Node, Double> mapDiffusion = getMapDiffusedTotalAlteredPercentage(network, diffusion); // keep track of the top nKeep PriorityQueue<Node> topAlteredNodes = new PriorityQueue<Node>(n, new Comparator<Node>() { public int compare(Node n1, Node n2) { int ret = mapDiffusion.get(n1).compareTo(mapDiffusion.get(n2)); if (diffusion != 0 && ret == 0) { // if the same diffused perc, use own perc ret = Double.compare(getTotalAlteredPercentage(n1), getTotalAlteredPercentage(n2)); } if (ret == 0) { // if the same, rank according to degree ret = network.getDegree(n1) - network.getDegree(n2); } return ret; } }); List<Node> nodesToRemove = new ArrayList<Node>(); for (Node node : network.getNodes()) { if (isInQuery(node) || node.getType().equals(NodeType.DRUG)) { continue; } if (topAlteredNodes.size() < n) { topAlteredNodes.add(node); } else { if (n == 0) { nodesToRemove.add(node); } else { if (mapDiffusion.get(node) > mapDiffusion.get(topAlteredNodes.peek())) { nodesToRemove.add(topAlteredNodes.poll()); topAlteredNodes.add(node); } else { nodesToRemove.add(node); } } } } return nodesToRemove; }
From source file:de.tudarmstadt.lt.n2n.annotators.RelationAnnotator.java
protected List<Dependency> find_path_dijkstra(Token start, Token dest, Collection<Token> nodes, Map<Token, List<Dependency>> edges) throws IllegalStateException { List<Dependency> shortest_path = new ArrayList<Dependency>(); final Map<Token, Integer> dist = new HashMap<Token, Integer>(); final Map<Token, Dependency> prev = new HashMap<Token, Dependency>(); for (Token t : nodes) dist.put(t, Integer.MAX_VALUE); dist.put(start, 0);//from w w w. jav a 2s .c om PriorityQueue<Token> Q = new PriorityQueue<Token>(edges.size(), new Comparator<Token>() { @Override public int compare(Token o1, Token o2) { return dist.get(o1).compareTo(dist.get(o2)); } }); Q.addAll(nodes); while (!Q.isEmpty()) { Token u = Q.poll(); // initially source node if (u.equals(dest)) // stop if dest break; if (dist.get(u) == Integer.MAX_VALUE) throw new IllegalStateException(String.format( "Could not find path from token '%s' to token '%s'. Perhaps start or dest is part of a preposition? (%s)", start.getCoveredText(), dest.getCoveredText(), DocumentMetaData.get(u.getCAS()).getDocumentId())); List<Dependency> connected_edges = edges.get(u); if (connected_edges == null) continue; for (Dependency d : connected_edges) { Token v = null; if (u.equals(d.getGovernor())) v = d.getDependent(); else v = d.getGovernor(); if (!Q.contains(v)) continue; int alt = dist.get(u) + 1; // dist(u,v) = 1 if (alt < dist.get(v)) { dist.put(v, alt); prev.put(v, d); Q.remove(v); // reinsert v so that Q is recomputed Q.offer(v); } } } Token u = dest; Dependency e = prev.get(u); while (e != null) { shortest_path.add(0, e); if (u == e.getGovernor()) u = e.getDependent(); else u = e.getGovernor(); e = prev.get(u); } return shortest_path; }
From source file:org.apache.hama.ml.recommendation.cf.OnlineCF.java
@Override public List<Preference<Long, Long>> getMostPreferredItems(long userId, int count) { Comparator<Preference<Long, Long>> scoreComparator = new Comparator<Preference<Long, Long>>() { @Override/*from w w w . ja va 2 s.c om*/ public int compare(Preference<Long, Long> arg0, Preference<Long, Long> arg1) { double difference = arg0.getValue().get() - arg1.getValue().get(); return (int) (100000 * difference); } }; PriorityQueue<Preference<Long, Long>> queue = new PriorityQueue<Preference<Long, Long>>(count, scoreComparator); LinkedList<Preference<Long, Long>> results = new LinkedList<Preference<Long, Long>>(); if (function == null) { Class<?> cls = conf.getClass(OnlineCF.Settings.CONF_ONLINE_UPDATE_FUNCTION, null); try { function = (OnlineUpdate.Function) (cls.newInstance()); } catch (Exception e) { // set default function } } InputStructure e = new InputStructure(); e.user = this.modelUserFactorizedValues.get(Long.valueOf(userId)); e.userFeatureFactorized = this.modelUserFeatureFactorizedValues; e.userFeatures = this.modelUserFeatures.get(Long.valueOf(userId)); e.itemFeatureFactorized = this.modelItemFeatureFactorizedValues; if (e.user == null) { return null; } double score = 0.0; for (Entry<Long, VectorWritable> item : modelItemFactorizedValues.entrySet()) { e.item = item.getValue(); e.itemFeatures = this.modelItemFeatures.get(item.getKey()); score = function.predict(e); queue.add(new Preference<Long, Long>(userId, item.getKey(), score)); } results.addAll(queue); return results; }
From source file:com.datatorrent.contrib.hdht.HDHTWalManager.java
/** * Copy content from parent WAL files to new location ordered by WindowID. * @param parentWals/*from w ww . j av a 2 s . c om*/ * @param walPositions */ public void mergeWalFiles(List<PreviousWALDetails> parentWals, HashMap<Long, WalPosition> walPositions) { Map<Long, Iterator<Map.Entry<Long, WalPosition>>> iteratorsMap = Maps.newHashMap(); Map<Long, WalPosition> startPositionMap = Maps.newHashMap(); for (PreviousWALDetails walDetails : parentWals) { Iterator<Map.Entry<Long, WalPosition>> it = walDetails.walPositions.entrySet().iterator(); iteratorsMap.put(walDetails.getWalKey(), it); if (walDetails.getCommittedWalPosition() != null) { startPositionMap.put(walDetails.getWalKey(), walDetails.getCommittedWalPosition()); } else { startPositionMap.put(walDetails.getWalKey(), new WalPosition(0, 0)); } } PriorityQueue<WalWindowPosition> currentValues = new PriorityQueue<>(parentWals.size(), new Comparator<WalWindowPosition>() { @Override public int compare(WalWindowPosition o1, WalWindowPosition o2) { return (int) (o1.windowId - o2.windowId); } }); do { for (Map.Entry<Long, Iterator<Map.Entry<Long, WalPosition>>> entry : iteratorsMap.entrySet()) { if (entry.getValue().hasNext()) { Map.Entry<Long, WalPosition> windowWalPosition = entry.getValue().next(); currentValues.add(new WalWindowPosition(entry.getKey(), windowWalPosition.getKey(), windowWalPosition.getValue())); } } if (!currentValues.isEmpty()) { WalWindowPosition minWindowWalEntry = currentValues.remove(); copyWALFiles(startPositionMap.get(minWindowWalEntry.walKey), minWindowWalEntry.walPosition, minWindowWalEntry.walKey); // Set next start position for WAL key startPositionMap.put(minWindowWalEntry.walKey, minWindowWalEntry.walPosition); // Set end position for windowId for checkpointed positions walPositions.put(minWindowWalEntry.windowId, this.getCurrentPosition()); } } while (!currentValues.isEmpty()); }
From source file:org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.java
@Override public void selectInputStreams(Collection<EditLogInputStream> streams, long fromTxnId, boolean inProgressOk) throws IOException { QuorumCall<AsyncLogger, RemoteEditLogManifest> q = loggers.getEditLogManifest(fromTxnId, inProgressOk); Map<AsyncLogger, RemoteEditLogManifest> resps = loggers.waitForWriteQuorum(q, selectInputStreamsTimeoutMs, "selectInputStreams"); LOG.debug("selectInputStream manifests:\n" + Joiner.on("\n").withKeyValueSeparator(": ").join(resps)); final PriorityQueue<EditLogInputStream> allStreams = new PriorityQueue<EditLogInputStream>(64, JournalSet.EDIT_LOG_INPUT_STREAM_COMPARATOR); for (Map.Entry<AsyncLogger, RemoteEditLogManifest> e : resps.entrySet()) { AsyncLogger logger = e.getKey(); RemoteEditLogManifest manifest = e.getValue(); for (RemoteEditLog remoteLog : manifest.getLogs()) { URL url = logger.buildURLToFetchLogs(remoteLog.getStartTxId()); EditLogInputStream elis = EditLogFileInputStream.fromUrl(connectionFactory, url, remoteLog.getStartTxId(), remoteLog.getEndTxId(), remoteLog.isInProgress()); allStreams.add(elis);//from w w w .j av a2s . com } } JournalSet.chainAndMakeRedundantStreams(streams, allStreams, fromTxnId); }
From source file:cn.edu.bjtu.cit.recommender.Recommender.java
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println();//from w w w.ja va2s.c o m System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output [profiling] [estimation] [clustersize]"); System.err.println(); printUsage(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } OptionParser parser = new OptionParser(args); Pipeline pipeline = new MRPipeline(Recommender.class, getConf()); if (parser.hasOption(CLUSTER_SIZE)) { pipeline.getConfiguration().setInt(ClusterOracle.CLUSTER_SIZE, Integer.parseInt(parser.getOption(CLUSTER_SIZE).getValue())); } if (parser.hasOption(PROFILING)) { pipeline.getConfiguration().setBoolean(Profiler.IS_PROFILE, true); this.profileFilePath = parser.getOption(PROFILING).getValue(); } if (parser.hasOption(ESTIMATION)) { estFile = parser.getOption(ESTIMATION).getValue(); est = new Estimator(estFile, clusterSize); } if (parser.hasOption(OPT_REDUCE)) { pipeline.getConfiguration().setBoolean(OPT_REDUCE, true); } if (parser.hasOption(OPT_MSCR)) { pipeline.getConfiguration().setBoolean(OPT_MSCR, true); } if (parser.hasOption(ACTIVE_THRESHOLD)) { threshold = Integer.parseInt(parser.getOption("at").getValue()); } if (parser.hasOption(TOP)) { top = Integer.parseInt(parser.getOption("top").getValue()); } profiler = new Profiler(pipeline); /* * input node */ PCollection<String> lines = pipeline.readTextFile(args[0]); if (profiler.isProfiling() && lines.getSize() > 10 * 1024 * 1024) { lines = lines.sample(0.1); } /* * S0 + GBK */ PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() { @Override public Pair<Long, Long> map(String input) { String[] split = input.split(Estimator.DELM); long userID = Long.parseLong(split[0]); long itemID = Long.parseLong(split[1]); return Pair.of(userID, itemID); } @Override public float scaleFactor() { return est.getScaleFactor("S0").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S0").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey(est.getClusterSize()); /* * S1 */ PTable<Long, Vector> userVector = userWithPrefs .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() { @Override public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (long itemPref : input.second()) { userVector.set((int) itemPref, 1.0f); } return Pair.of(input.first(), userVector); } @Override public float scaleFactor() { return est.getScaleFactor("S1").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S1").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); userVector = profiler.profile("S0-S1", pipeline, userVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S2 */ PTable<Long, Vector> filteredUserVector = userVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, Vector>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, Vector>> emitter) { if (input.second().getNumNondefaultElements() > threshold) { emitter.emit(input); } } @Override public float scaleFactor() { return est.getScaleFactor("S2").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S2").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); filteredUserVector = profiler.profile("S2", pipeline, filteredUserVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S3 + GBK */ PGroupedTable<Integer, Integer> coOccurencePairs = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) { Iterator<Vector.Element> it = input.second().iterateNonZero(); while (it.hasNext()) { int index1 = it.next().index(); Iterator<Vector.Element> it2 = input.second().iterateNonZero(); while (it2.hasNext()) { int index2 = it2.next().index(); emitter.emit(Pair.of(index1, index2)); } } } @Override public float scaleFactor() { float size = est.getScaleFactor("S3").sizeFactor; return size; } @Override public float scaleFactorByRecord() { float recs = est.getScaleFactor("S3").recsFactor; return recs; } }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey(est.getClusterSize()); /* * S4 */ PTable<Integer, Vector> coOccurenceVector = coOccurencePairs .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() { @Override public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) { Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (int itemIndex2 : input.second()) { cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } return Pair.of(input.first(), cooccurrenceRow); } @Override public float scaleFactor() { return est.getScaleFactor("S4").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S4").recsFactor; } }, Writables.tableOf(Writables.ints(), Writables.vectors())); coOccurenceVector = profiler.profile("S3-S4", pipeline, coOccurenceVector, ProfileConverter.int_vector(), Writables.tableOf(Writables.ints(), Writables.vectors())); /* * S5 Wrapping co-occurrence columns */ PTable<Integer, VectorOrPref> wrappedCooccurrence = coOccurenceVector .parallelDo(new MapFn<Pair<Integer, Vector>, Pair<Integer, VectorOrPref>>() { @Override public Pair<Integer, VectorOrPref> map(Pair<Integer, Vector> input) { return Pair.of(input.first(), new VectorOrPref(input.second())); } @Override public float scaleFactor() { return est.getScaleFactor("S5").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S5").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); wrappedCooccurrence = profiler.profile("S5", pipeline, wrappedCooccurrence, ProfileConverter.int_vopv(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S6 Splitting user vectors */ PTable<Integer, VectorOrPref> userVectorSplit = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, VectorOrPref>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, VectorOrPref>> emitter) { long userID = input.first(); Vector userVector = input.second(); Iterator<Vector.Element> it = userVector.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int itemIndex = e.index(); float preferenceValue = (float) e.get(); emitter.emit(Pair.of(itemIndex, new VectorOrPref(userID, preferenceValue))); } } @Override public float scaleFactor() { return est.getScaleFactor("S6").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S6").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); userVectorSplit = profiler.profile("S6", pipeline, userVectorSplit, ProfileConverter.int_vopp(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S7 Combine VectorOrPrefs */ PTable<Integer, VectorAndPrefs> combinedVectorOrPref = wrappedCooccurrence.union(userVectorSplit) .groupByKey(est.getClusterSize()) .parallelDo(new DoFn<Pair<Integer, Iterable<VectorOrPref>>, Pair<Integer, VectorAndPrefs>>() { @Override public void process(Pair<Integer, Iterable<VectorOrPref>> input, Emitter<Pair<Integer, VectorAndPrefs>> emitter) { Vector vector = null; List<Long> userIDs = Lists.newArrayList(); List<Float> values = Lists.newArrayList(); for (VectorOrPref vop : input.second()) { if (vector == null) { vector = vop.getVector(); } long userID = vop.getUserID(); if (userID != Long.MIN_VALUE) { userIDs.add(vop.getUserID()); } float value = vop.getValue(); if (!Float.isNaN(value)) { values.add(vop.getValue()); } } emitter.emit(Pair.of(input.first(), new VectorAndPrefs(vector, userIDs, values))); } @Override public float scaleFactor() { return est.getScaleFactor("S7").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S7").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); combinedVectorOrPref = profiler.profile("S5+S6-S7", pipeline, combinedVectorOrPref, ProfileConverter.int_vap(), Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); /* * S8 Computing partial recommendation vectors */ PTable<Long, Vector> partialMultiply = combinedVectorOrPref .parallelDo(new DoFn<Pair<Integer, VectorAndPrefs>, Pair<Long, Vector>>() { @Override public void process(Pair<Integer, VectorAndPrefs> input, Emitter<Pair<Long, Vector>> emitter) { Vector cooccurrenceColumn = input.second().getVector(); List<Long> userIDs = input.second().getUserIDs(); List<Float> prefValues = input.second().getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); if (userID != Long.MIN_VALUE) { float prefValue = prefValues.get(i); Vector partialProduct = cooccurrenceColumn.times(prefValue); emitter.emit(Pair.of(userID, partialProduct)); } } } @Override public float scaleFactor() { return est.getScaleFactor("S8").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S8").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())).groupByKey(est.getClusterSize()) .combineValues(new CombineFn<Long, Vector>() { @Override public void process(Pair<Long, Iterable<Vector>> input, Emitter<Pair<Long, Vector>> emitter) { Vector partial = null; for (Vector vector : input.second()) { partial = partial == null ? vector : partial.plus(vector); } emitter.emit(Pair.of(input.first(), partial)); } @Override public float scaleFactor() { return est.getScaleFactor("combine").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("combine").recsFactor; } }); partialMultiply = profiler.profile("S8-combine", pipeline, partialMultiply, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S9 Producing recommendations from vectors */ PTable<Long, RecommendedItems> recommendedItems = partialMultiply .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, RecommendedItems>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, RecommendedItems>> emitter) { Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(11, Collections.reverseOrder(BY_PREFERENCE_VALUE)); Iterator<Vector.Element> recommendationVectorIterator = input.second().iterateNonZero(); while (recommendationVectorIterator.hasNext()) { Vector.Element element = recommendationVectorIterator.next(); int index = element.index(); float value = (float) element.get(); if (topItems.size() < top) { topItems.add(new GenericRecommendedItem(index, value)); } else if (value > topItems.peek().getValue()) { topItems.add(new GenericRecommendedItem(index, value)); topItems.poll(); } } List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size()); recommendations.addAll(topItems); Collections.sort(recommendations, BY_PREFERENCE_VALUE); emitter.emit(Pair.of(input.first(), new RecommendedItems(recommendations))); } @Override public float scaleFactor() { return est.getScaleFactor("S9").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S9").recsFactor; } }, Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); recommendedItems = profiler.profile("S9", pipeline, recommendedItems, ProfileConverter.long_ri(), Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); /* * Profiling */ if (profiler.isProfiling()) { profiler.writeResultToFile(profileFilePath); profiler.cleanup(pipeline.getConfiguration()); return 0; } /* * asText */ pipeline.writeTextFile(recommendedItems, args[1]); PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
From source file:org.apache.hama.ml.recommendation.cf.OnlineCF.java
@Override public List<Pair<Long, Double>> getMostSimilarUsers(long user, int count) { Comparator<Pair<Long, Double>> similarityComparator = new Comparator<Pair<Long, Double>>() { @Override//from w w w .j a v a 2s . c om public int compare(Pair<Long, Double> arg0, Pair<Long, Double> arg1) { double difference = arg0.getValue().doubleValue() - arg1.getValue().doubleValue(); return (int) (100000 * difference); } }; PriorityQueue<Pair<Long, Double>> queue = new PriorityQueue<Pair<Long, Double>>(count, similarityComparator); LinkedList<Pair<Long, Double>> results = new LinkedList<Pair<Long, Double>>(); for (Long candidateUser : modelUserFactorizedValues.keySet()) { double similarity = calculateUserSimilarity(user, candidateUser); Pair<Long, Double> targetUser = new Pair<Long, Double>(candidateUser, similarity); queue.add(targetUser); } results.addAll(queue); return results; }
From source file:org.apache.hama.ml.recommendation.cf.OnlineCF.java
@Override public List<Pair<Long, Double>> getMostSimilarItems(long item, int count) { Comparator<Pair<Long, Double>> similarityComparator = new Comparator<Pair<Long, Double>>() { @Override// w w w .j a v a2 s .co m public int compare(Pair<Long, Double> arg0, Pair<Long, Double> arg1) { double difference = arg0.getValue().doubleValue() - arg1.getValue().doubleValue(); return (int) (100000 * difference); } }; PriorityQueue<Pair<Long, Double>> queue = new PriorityQueue<Pair<Long, Double>>(count, similarityComparator); LinkedList<Pair<Long, Double>> results = new LinkedList<Pair<Long, Double>>(); for (Long candidateItem : modelItemFactorizedValues.keySet()) { double similarity = calculateItemSimilarity(item, candidateItem); Pair<Long, Double> targetItem = new Pair<Long, Double>(candidateItem, similarity); queue.add(targetItem); } results.addAll(queue); return results; }
From source file:com.uber.stream.kafka.mirrormaker.manager.core.ControllerHelixManager.java
public synchronized void updateCurrentStatus() { _lock.lock();//from w w w. j av a 2s. c o m try { long currTimeMs = System.currentTimeMillis(); if (currTimeMs - lastUpdateTimeMs < _conf.getUpdateStatusCoolDownMs()) { LOGGER.info("Only {} ms since last updateCurrentStatus, wait for next one", currTimeMs - lastUpdateTimeMs); return; } LOGGER.info("Trying to run controller updateCurrentStatus"); _workerHelixManager.updateCurrentStatus(); // Map<InstanceName, InstanceTopicPartitionHolder> Map<String, InstanceTopicPartitionHolder> instanceMap = new HashMap<>(); // Map<TopicName, Map<Pipeline, Instance>> Map<String, Map<String, InstanceTopicPartitionHolder>> currTopicToPipelineInstanceMap = new HashMap<>(); // Map<Pipeline, PriorityQueue<Instance>> Map<String, PriorityQueue<InstanceTopicPartitionHolder>> currPipelineToInstanceMap = new HashMap<>(); // Set<InstanceName> List<String> currAvailableControllerList = new ArrayList<>(); Map<TopicPartition, List<String>> workerRouteToInstanceMap = _workerHelixManager .getWorkerRouteToInstanceMap(); // Map<Instance, Set<Pipeline>> from IdealState Map<String, Set<TopicPartition>> instanceToTopicPartitionsMap = HelixUtils .getInstanceToTopicPartitionsMap(_helixManager, _srcKafkaValidationManager.getClusterToObserverMap()); List<String> liveInstances = HelixUtils.liveInstances(_helixManager); currAvailableControllerList.addAll(liveInstances); int assignedControllerCount = 0; for (String instanceId : instanceToTopicPartitionsMap.keySet()) { Set<TopicPartition> topicPartitions = instanceToTopicPartitionsMap.get(instanceId); // TODO: one instance suppose to have only one route for (TopicPartition tp : topicPartitions) { String topicName = tp.getTopic(); if (topicName.startsWith(SEPARATOR)) { currPipelineToInstanceMap.putIfAbsent(topicName, new PriorityQueue<>(1, InstanceTopicPartitionHolder.totalWorkloadComparator(_pipelineWorkloadMap))); InstanceTopicPartitionHolder itph = new InstanceTopicPartitionHolder(instanceId, tp); if (workerRouteToInstanceMap.get(tp) != null) { itph.addWorkers(workerRouteToInstanceMap.get(tp)); } currPipelineToInstanceMap.get(topicName).add(itph); instanceMap.put(instanceId, itph); currAvailableControllerList.remove(instanceId); assignedControllerCount++; } } for (TopicPartition tp : topicPartitions) { String topicName = tp.getTopic(); if (!topicName.startsWith(SEPARATOR)) { if (instanceMap.containsKey(instanceId)) { instanceMap.get(instanceId).addTopicPartition(tp); currTopicToPipelineInstanceMap.putIfAbsent(topicName, new ConcurrentHashMap<>()); currTopicToPipelineInstanceMap.get(tp.getTopic()) .put(getPipelineFromRoute(tp.getPipeline()), instanceMap.get(instanceId)); } } } } _pipelineToInstanceMap = currPipelineToInstanceMap; _topicToPipelineInstanceMap = currTopicToPipelineInstanceMap; _availableControllerList = currAvailableControllerList; if (_helixManager.isLeader()) { _availableController.inc(_availableControllerList.size() - _availableController.getCount()); _availableWorker .inc(_workerHelixManager.getAvailableWorkerList().size() - _availableWorker.getCount()); _assignedControllerCount.inc(assignedControllerCount - _assignedControllerCount.getCount()); } // Validation validateInstanceToTopicPartitionsMap(instanceToTopicPartitionsMap, instanceMap); //LOGGER.info("For controller _pipelineToInstanceMap: {}", _pipelineToInstanceMap); //LOGGER.info("For controller _topicToPipelineInstanceMap: {}", _topicToPipelineInstanceMap); LOGGER.info("For controller {} available", _availableControllerList.size()); lastUpdateTimeMs = System.currentTimeMillis(); } catch (Exception e) { LOGGER.error("Got exception in updateCurrentStatus", e); } finally { _lock.unlock(); } }