List of usage examples for java.util PriorityQueue poll
public E poll()
From source file:com.koda.integ.hbase.blockcache.OnHeapBlockCache.java
/** * Eviction method./*from ww w .ja v a2s.co m*/ */ void evict() { // Ensure only one eviction at a time if (!evictionLock.tryLock()) return; try { evictionInProgress = true; long currentSize = this.size.get(); long bytesToFree = currentSize - minSize(); if (LOG.isDebugEnabled()) { LOG.debug("Block cache LRU eviction started; Attempting to free " + StringUtils.byteDesc(bytesToFree) + " of total=" + StringUtils.byteDesc(currentSize)); } if (bytesToFree <= 0) return; // Instantiate priority buckets BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize, singleSize()); BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize, multiSize()); BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize, memorySize()); // Scan entire map putting into appropriate buckets for (CachedBlock cachedBlock : map.values()) { switch (cachedBlock.getPriority()) { case SINGLE: { bucketSingle.add(cachedBlock); break; } case MULTI: { bucketMulti.add(cachedBlock); break; } case MEMORY: { bucketMemory.add(cachedBlock); break; } } } PriorityQueue<BlockBucket> bucketQueue = new PriorityQueue<BlockBucket>(3); bucketQueue.add(bucketSingle); bucketQueue.add(bucketMulti); bucketQueue.add(bucketMemory); int remainingBuckets = 3; long bytesFreed = 0; BlockBucket bucket; while ((bucket = bucketQueue.poll()) != null) { long overflow = bucket.overflow(); if (overflow > 0) { long bucketBytesToFree = Math.min(overflow, (bytesToFree - bytesFreed) / remainingBuckets); bytesFreed += bucket.free(bucketBytesToFree); } remainingBuckets--; } if (LOG.isDebugEnabled()) { long single = bucketSingle.totalSize(); long multi = bucketMulti.totalSize(); long memory = bucketMemory.totalSize(); LOG.debug("Block cache LRU eviction completed; " + "freed=" + StringUtils.byteDesc(bytesFreed) + ", " + "total=" + StringUtils.byteDesc(this.size.get()) + ", " + "single=" + StringUtils.byteDesc(single) + ", " + "multi=" + StringUtils.byteDesc(multi) + ", " + "memory=" + StringUtils.byteDesc(memory)); } } finally { stats.evict(); evictionInProgress = false; evictionLock.unlock(); } }
From source file:com.uber.stream.kafka.mirrormaker.controller.core.IdealStateBuilder.java
public static IdealState expandCustomRebalanceModeIdealStateFor(IdealState oldIdealState, String topicName, int newNumTopicPartitions, ControllerConf controllerConf, PriorityQueue<InstanceTopicPartitionHolder> currentServingInstances) { final CustomModeISBuilder customModeIdealStateBuilder = new CustomModeISBuilder(topicName); customModeIdealStateBuilder.setStateModel(OnlineOfflineStateModel.name) .setNumPartitions(newNumTopicPartitions).setNumReplica(1) .setMaxPartitionsPerNode(newNumTopicPartitions); int numOldPartitions = oldIdealState.getNumPartitions(); for (int i = 0; i < numOldPartitions; ++i) { String partitionName = Integer.toString(i); try {/* w ww .ja v a 2 s .co m*/ String instanceName = oldIdealState.getInstanceStateMap(partitionName).keySet().iterator().next(); customModeIdealStateBuilder.assignInstanceAndState(partitionName, instanceName, "ONLINE"); } catch (Exception e) { // No worker added into the cluster. } } ZkClient zkClient = null; String topicPath = "/consumers/" + controllerConf.getGroupId() + "/offsets/" + topicName; String consumerOffsetPath = topicPath + "/"; String zkString = controllerConf.getConsumerCommitZkPath().isEmpty() ? controllerConf.getSrcKafkaZkPath() : controllerConf.getConsumerCommitZkPath(); boolean pathExisted = false; if (!StringUtils.isEmpty(zkString)) { zkClient = new ZkClient(zkString, 30000, 30000, ZKStringSerializer$.MODULE$); try { if (!zkClient.exists(topicPath)) { zkClient.createPersistent(topicPath); } pathExisted = true; } catch (Exception e) { LOGGER.warn("Fails to create path {}", topicPath, e); } } // Assign new partitions to as many workers as possible List<InstanceTopicPartitionHolder> instancesForNewPartitions = new ArrayList<>(); while (instancesForNewPartitions.size() < newNumTopicPartitions - numOldPartitions && !currentServingInstances.isEmpty()) { instancesForNewPartitions.add(currentServingInstances.poll()); } if (!instancesForNewPartitions.isEmpty()) { for (int i = numOldPartitions; i < newNumTopicPartitions; ++i) { if (pathExisted) { Object obj = zkClient.readData(consumerOffsetPath + i, true); if (obj == null) { zkClient.createPersistent(consumerOffsetPath + i, "0"); LOGGER.info("Create new zk node " + zkString + consumerOffsetPath + i); } } InstanceTopicPartitionHolder liveInstance = instancesForNewPartitions .get((i - numOldPartitions) % instancesForNewPartitions.size()); customModeIdealStateBuilder.assignInstanceAndState(Integer.toString(i), liveInstance.getInstanceName(), "ONLINE"); liveInstance.addTopicPartition(new TopicPartition(topicName, i)); LOGGER.info("Assign new partition " + topicName + ":" + i + " to instance " + liveInstance.getInstanceName()); } currentServingInstances.addAll(instancesForNewPartitions); } if (zkClient != null) { zkClient.close(); } return customModeIdealStateBuilder.build(); }
From source file:classif.ahc.AHCSymbolicSequence.java
public void cluster() { // cache all distances distances = new double[data.size()][data.size()]; for (int i = 0; i < data.size(); i++) { for (int j = i + 1; j < data.size(); j++) { distances[i][j] = data.get(i).distance(data.get(j)); distances[j][i] = distances[i][j]; }//from w w w. j a v a 2s.c om } System.out.println("distances cached"); ArrayList<Integer>[] nClusterID = new ArrayList[data.size()]; for (int i = 0; i < data.size(); i++) { nClusterID[i] = new ArrayList<Integer>(); nClusterID[i].add(i); } int nClusters = data.size(); int nInstances = data.size(); Node[] clusterNodes = new Node[data.size()]; PriorityQueue<Tuple> queue = new PriorityQueue<Tuple>(nClusters, new TupleComparator()); double[][] fDistance0 = new double[nClusters][nClusters]; for (int i = 0; i < nClusters; i++) { fDistance0[i][i] = 0; for (int j = i + 1; j < nClusters; j++) { fDistance0[i][j] = getDistanceClusters(nClusterID[i], nClusterID[j]); fDistance0[j][i] = fDistance0[i][j]; queue.add(new Tuple(fDistance0[i][j], i, j, 1, 1)); } } centroidsForNumberOfClusters = new ArrayList[data.size() + 1]; centroidsForNumberOfClusters[data.size()] = new ArrayList<Sequence>(); for (int i = 0; i < data.size(); i++) { centroidsForNumberOfClusters[data.size()].add(data.get(i)); } while (nClusters > 1) { System.out.println("nClusters left = " + nClusters); int iMin1 = -1; int iMin2 = -1; Tuple t; do { t = queue.poll(); } while (t != null && (nClusterID[t.m_iCluster1].size() != t.m_nClusterSize1 || nClusterID[t.m_iCluster2].size() != t.m_nClusterSize2)); iMin1 = t.m_iCluster1; iMin2 = t.m_iCluster2; centroidsForNumberOfClusters[nClusters - 1] = (ArrayList<Sequence>) centroidsForNumberOfClusters[nClusters].clone(); merge(iMin1, iMin2, t.m_fDist, t.m_fDist, nClusterID, centroidsForNumberOfClusters[nClusters - 1], clusterNodes, distances); for (int i = 0; i < nInstances; i++) { if (i != iMin1 && nClusterID[i].size() != 0) { int i1 = Math.min(iMin1, i); int i2 = Math.max(iMin1, i); double fDistance = getDistanceClusters(nClusterID[i1], nClusterID[i2]); queue.add(new Tuple(fDistance, i1, i2, nClusterID[i1].size(), nClusterID[i2].size())); } } nClusters--; } System.out.println("Clustering done for all possible cuts"); }
From source file:edu.snu.leader.hierarchy.simple.Individual.java
/** * Finds the nearest neighbors for this individual * * @param simState// w ww . j a va 2 s .co m */ private void findNearestNeighbors(SimulationState simState) { _LOG.trace("Entering findNearestNeighbors( simState )"); // Get the number of nearest neighbors _nearestNeighborCount = simState.getNearestNeighborCount(); // Build a priority queue to sort things for us PriorityQueue<Neighbor> sortedNeighbors = new PriorityQueue<Neighbor>(); // Iterate through all the individuals Iterator<Individual> indIter = simState.getAllIndividuals().iterator(); while (indIter.hasNext()) { // Get the individual Individual ind = indIter.next(); // If it is us, continue on if (_id.equals(ind._id)) { continue; } // Build a neighbor out of it and put it in the queue Neighbor neighbor = new Neighbor((float) _location.distance(ind._location), ind); sortedNeighbors.add(neighbor); } // Get the "nearest" neighbors int count = Math.min(sortedNeighbors.size(), _nearestNeighborCount); for (int i = 0; i < count; i++) { _nearestNeighbors.add(sortedNeighbors.poll()); } _LOG.trace("Leaving findNearestNeighbors( simState )"); }
From source file:org.apache.hadoop.hbase.io.hfile.LruBlockCache.java
/** * Eviction method./*from ww w . j ava2s. c om*/ */ void evict() { // Ensure only one eviction at a time if (!evictionLock.tryLock()) return; try { evictionInProgress = true; long currentSize = this.size.get(); long bytesToFree = currentSize - minSize(); if (LOG.isTraceEnabled()) { LOG.trace("Block cache LRU eviction started; Attempting to free " + StringUtils.byteDesc(bytesToFree) + " of total=" + StringUtils.byteDesc(currentSize)); } if (bytesToFree <= 0) return; // Instantiate priority buckets BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize, singleSize()); BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize, multiSize()); BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize, memorySize()); // Scan entire map putting into appropriate buckets for (CachedBlock cachedBlock : map.values()) { switch (cachedBlock.getPriority()) { case SINGLE: { bucketSingle.add(cachedBlock); break; } case MULTI: { bucketMulti.add(cachedBlock); break; } case MEMORY: { bucketMemory.add(cachedBlock); break; } } } long bytesFreed = 0; if (forceInMemory || memoryFactor > 0.999f) { long s = bucketSingle.totalSize(); long m = bucketMulti.totalSize(); if (bytesToFree > (s + m)) { // this means we need to evict blocks in memory bucket to make room, // so the single and multi buckets will be emptied bytesFreed = bucketSingle.free(s); bytesFreed += bucketMulti.free(m); bytesFreed += bucketMemory.free(bytesToFree - bytesFreed); } else { // this means no need to evict block in memory bucket, // and we try best to make the ratio between single-bucket and // multi-bucket is 1:2 long bytesRemain = s + m - bytesToFree; if (3 * s <= bytesRemain) { // single-bucket is small enough that no eviction happens for it // hence all eviction goes from multi-bucket bytesFreed = bucketMulti.free(bytesToFree); } else if (3 * m <= 2 * bytesRemain) { // multi-bucket is small enough that no eviction happens for it // hence all eviction goes from single-bucket bytesFreed = bucketSingle.free(bytesToFree); } else { // both buckets need to evict some blocks bytesFreed = bucketSingle.free(s - bytesRemain / 3); if (bytesFreed < bytesToFree) { bytesFreed += bucketMulti.free(bytesToFree - bytesFreed); } } } } else { PriorityQueue<BlockBucket> bucketQueue = new PriorityQueue<BlockBucket>(3); bucketQueue.add(bucketSingle); bucketQueue.add(bucketMulti); bucketQueue.add(bucketMemory); int remainingBuckets = 3; BlockBucket bucket; while ((bucket = bucketQueue.poll()) != null) { long overflow = bucket.overflow(); if (overflow > 0) { long bucketBytesToFree = Math.min(overflow, (bytesToFree - bytesFreed) / remainingBuckets); bytesFreed += bucket.free(bucketBytesToFree); } remainingBuckets--; } } if (LOG.isTraceEnabled()) { long single = bucketSingle.totalSize(); long multi = bucketMulti.totalSize(); long memory = bucketMemory.totalSize(); LOG.trace("Block cache LRU eviction completed; " + "freed=" + StringUtils.byteDesc(bytesFreed) + ", " + "total=" + StringUtils.byteDesc(this.size.get()) + ", " + "single=" + StringUtils.byteDesc(single) + ", " + "multi=" + StringUtils.byteDesc(multi) + ", " + "memory=" + StringUtils.byteDesc(memory)); } } finally { stats.evict(); evictionInProgress = false; evictionLock.unlock(); } }
From source file:edu.stanford.cfuller.colocalization3d.correction.PositionCorrector.java
/** * Creates a correction from a set of objects whose positions should be the same in each channel. * * @param imageObjects A Vector containing all the ImageObjects to be used for the correction * or in the order it appears in a multiwavelength image file. * @return A Correction object that can be used to correct the positions of other objects based upon the standards provided. *//*from www . ja v a 2 s . c o m*/ public Correction getCorrection(java.util.List<ImageObject> imageObjects) { int referenceChannel = this.parameters.getIntValueForKey(REF_CH_PARAM); int channelToCorrect = this.parameters.getIntValueForKey(CORR_CH_PARAM); if (!this.parameters.hasKeyAndTrue(DET_CORR_PARAM)) { try { return Correction.readFromDisk(FileUtils.getCorrectionFilename(this.parameters)); } catch (java.io.IOException e) { java.util.logging.Logger .getLogger(edu.stanford.cfuller.colocalization3d.Colocalization3DMain.LOGGER_NAME) .severe("Exception encountered while reading correction from disk: "); e.printStackTrace(); } catch (ClassNotFoundException e) { java.util.logging.Logger .getLogger(edu.stanford.cfuller.colocalization3d.Colocalization3DMain.LOGGER_NAME) .severe("Exception encountered while reading correction from disk: "); e.printStackTrace(); } return null; } int numberOfPointsToFit = this.parameters.getIntValueForKey(NUM_POINT_PARAM); RealMatrix correctionX = new Array2DRowRealMatrix(imageObjects.size(), numberOfCorrectionParameters); RealMatrix correctionY = new Array2DRowRealMatrix(imageObjects.size(), numberOfCorrectionParameters); RealMatrix correctionZ = new Array2DRowRealMatrix(imageObjects.size(), numberOfCorrectionParameters); RealVector distanceCutoffs = new ArrayRealVector(imageObjects.size(), 0.0); RealVector ones = new ArrayRealVector(numberOfPointsToFit, 1.0); RealVector distancesToObjects = new ArrayRealVector(imageObjects.size(), 0.0); RealMatrix allCorrectionParametersMatrix = new Array2DRowRealMatrix(numberOfPointsToFit, numberOfCorrectionParameters); for (int i = 0; i < imageObjects.size(); i++) { RealVector ithPos = imageObjects.get(i).getPositionForChannel(referenceChannel); for (int j = 0; j < imageObjects.size(); j++) { double d = imageObjects.get(j).getPositionForChannel(referenceChannel).subtract(ithPos).getNorm(); distancesToObjects.setEntry(j, d); } //the sorting becomes a bottleneck once the number of points gets large //reverse comparator so we can use the priority queue and get the max element at the head Comparator<Double> cdReverse = new Comparator<Double>() { public int compare(Double o1, Double o2) { if (o1.equals(o2)) return 0; if (o1 > o2) return -1; return 1; } }; PriorityQueue<Double> pq = new PriorityQueue<Double>(numberOfPointsToFit + 2, cdReverse); double maxElement = Double.MAX_VALUE; for (int p = 0; p < numberOfPointsToFit + 1; p++) { pq.add(distancesToObjects.getEntry(p)); } maxElement = pq.peek(); for (int p = numberOfPointsToFit + 1; p < distancesToObjects.getDimension(); p++) { double value = distancesToObjects.getEntry(p); if (value < maxElement) { pq.poll(); pq.add(value); maxElement = pq.peek(); } } double firstExclude = pq.poll(); double lastDist = pq.poll(); double distanceCutoff = (lastDist + firstExclude) / 2.0; distanceCutoffs.setEntry(i, distanceCutoff); RealVector xPositionsToFit = new ArrayRealVector(numberOfPointsToFit, 0.0); RealVector yPositionsToFit = new ArrayRealVector(numberOfPointsToFit, 0.0); RealVector zPositionsToFit = new ArrayRealVector(numberOfPointsToFit, 0.0); RealMatrix differencesToFit = new Array2DRowRealMatrix(numberOfPointsToFit, imageObjects.get(0).getPositionForChannel(referenceChannel).getDimension()); int toFitCounter = 0; for (int j = 0; j < imageObjects.size(); j++) { if (distancesToObjects.getEntry(j) < distanceCutoff) { xPositionsToFit.setEntry(toFitCounter, imageObjects.get(j).getPositionForChannel(referenceChannel).getEntry(0)); yPositionsToFit.setEntry(toFitCounter, imageObjects.get(j).getPositionForChannel(referenceChannel).getEntry(1)); zPositionsToFit.setEntry(toFitCounter, imageObjects.get(j).getPositionForChannel(referenceChannel).getEntry(2)); differencesToFit.setRowVector(toFitCounter, imageObjects.get(j) .getVectorDifferenceBetweenChannels(referenceChannel, channelToCorrect)); toFitCounter++; } } RealVector x = xPositionsToFit.mapSubtractToSelf(ithPos.getEntry(0)); RealVector y = yPositionsToFit.mapSubtractToSelf(ithPos.getEntry(1)); allCorrectionParametersMatrix.setColumnVector(0, ones); allCorrectionParametersMatrix.setColumnVector(1, x); allCorrectionParametersMatrix.setColumnVector(2, y); allCorrectionParametersMatrix.setColumnVector(3, x.map(new Power(2))); allCorrectionParametersMatrix.setColumnVector(4, y.map(new Power(2))); allCorrectionParametersMatrix.setColumnVector(5, x.ebeMultiply(y)); DecompositionSolver solver = (new QRDecomposition(allCorrectionParametersMatrix)).getSolver(); RealVector cX = solver.solve(differencesToFit.getColumnVector(0)); RealVector cY = solver.solve(differencesToFit.getColumnVector(1)); RealVector cZ = solver.solve(differencesToFit.getColumnVector(2)); correctionX.setRowVector(i, cX); correctionY.setRowVector(i, cY); correctionZ.setRowVector(i, cZ); } Correction c = new Correction(correctionX, correctionY, correctionZ, distanceCutoffs, imageObjects, referenceChannel, channelToCorrect); return c; }
From source file:org.apache.storm.daemon.logviewer.utils.DirectoryCleaner.java
/** * If totalSize of files exceeds the either the per-worker quota or global quota, * Logviewer deletes oldest inactive log files in a worker directory or in all worker dirs. * We use the parameter forPerDir to switch between the two deletion modes. * * @param dirs the list of directories to be scanned for deletion * @param quota the per-dir quota or the total quota for the all directories * @param forPerDir if true, deletion happens for a single dir; otherwise, for all directories globally * @param activeDirs only for global deletion, we want to skip the active logs in activeDirs * @return number of files deleted//from w ww. j a v a 2 s.c o m */ public DeletionMeta deleteOldestWhileTooLarge(List<Path> dirs, long quota, boolean forPerDir, Set<Path> activeDirs) throws IOException { long totalSize = 0; for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { totalSize += Files.size(path); } } } LOG.debug("totalSize: {} quota: {}", totalSize, quota); long toDeleteSize = totalSize - quota; if (toDeleteSize <= 0) { return DeletionMeta.EMPTY; } int deletedFiles = 0; long deletedSize = 0; // the oldest pq_size files in this directory will be placed in PQ, with the newest at the root PriorityQueue<Pair<Path, FileTime>> pq = new PriorityQueue<>(PQ_SIZE, Comparator.comparing((Pair<Path, FileTime> p) -> p.getRight()).reversed()); int round = 0; final Set<Path> excluded = new HashSet<>(); while (toDeleteSize > 0) { LOG.debug("To delete size is {}, start a new round of deletion, round: {}", toDeleteSize, round); for (Path dir : dirs) { try (DirectoryStream<Path> stream = getStreamForDirectory(dir)) { for (Path path : stream) { if (!excluded.contains(path)) { if (isFileEligibleToSkipDelete(forPerDir, activeDirs, dir, path)) { excluded.add(path); } else { Pair<Path, FileTime> p = Pair.of(path, Files.getLastModifiedTime(path)); if (pq.size() < PQ_SIZE) { pq.offer(p); } else if (p.getRight().toMillis() < pq.peek().getRight().toMillis()) { pq.poll(); pq.offer(p); } } } } } } if (!pq.isEmpty()) { // need to reverse the order of elements in PQ to delete files from oldest to newest Stack<Pair<Path, FileTime>> stack = new Stack<>(); while (!pq.isEmpty()) { stack.push(pq.poll()); } while (!stack.isEmpty() && toDeleteSize > 0) { Pair<Path, FileTime> pair = stack.pop(); Path file = pair.getLeft(); final String canonicalPath = file.toAbsolutePath().normalize().toString(); final long fileSize = Files.size(file); final long lastModified = pair.getRight().toMillis(); //Original implementation doesn't actually check if delete succeeded or not. try { Utils.forceDelete(file.toString()); LOG.info("Delete file: {}, size: {}, lastModified: {}", canonicalPath, fileSize, lastModified); toDeleteSize -= fileSize; deletedSize += fileSize; deletedFiles++; } catch (IOException e) { excluded.add(file); } } pq.clear(); round++; if (round >= MAX_ROUNDS) { if (forPerDir) { LOG.warn( "Reach the MAX_ROUNDS: {} during per-dir deletion, you may have too many files in " + "a single directory : {}, will delete the rest files in next interval.", MAX_ROUNDS, dirs.get(0).toAbsolutePath().normalize()); } else { LOG.warn("Reach the MAX_ROUNDS: {} during global deletion, you may have too many files, " + "will delete the rest files in next interval.", MAX_ROUNDS); } break; } } else { LOG.warn("No more files able to delete this round, but {} is over quota by {} MB", forPerDir ? "this directory" : "root directory", toDeleteSize * 1e-6); } } return new DeletionMeta(deletedSize, deletedFiles); }
From source file:org.apache.drill.exec.store.mongo.MongoGroupScan.java
@Override public void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException { logger.debug("Incoming endpoints :" + endpoints); watch.reset();// w w w.j a v a2 s. c om watch.start(); final int numSlots = endpoints.size(); int totalAssignmentsTobeDone = chunksMapping.size(); Preconditions.checkArgument(numSlots <= totalAssignmentsTobeDone, String.format( "Incoming endpoints %d is greater than number of chunks %d", numSlots, totalAssignmentsTobeDone)); final int minPerEndpointSlot = (int) Math.floor((double) totalAssignmentsTobeDone / numSlots); final int maxPerEndpointSlot = (int) Math.ceil((double) totalAssignmentsTobeDone / numSlots); endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots); Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap(); for (int i = 0; i < numSlots; ++i) { endpointFragmentMapping.put(i, new ArrayList<MongoSubScanSpec>(maxPerEndpointSlot)); String hostname = endpoints.get(i).getAddress(); Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname); if (hostIndexQueue == null) { hostIndexQueue = Lists.newLinkedList(); endpointHostIndexListMap.put(hostname, hostIndexQueue); } hostIndexQueue.add(i); } Set<Entry<String, List<ChunkInfo>>> chunksToAssignSet = Sets.newHashSet(chunksInverseMapping.entrySet()); for (Iterator<Entry<String, List<ChunkInfo>>> chunksIterator = chunksToAssignSet.iterator(); chunksIterator .hasNext();) { Entry<String, List<ChunkInfo>> chunkEntry = chunksIterator.next(); Queue<Integer> slots = endpointHostIndexListMap.get(chunkEntry.getKey()); if (slots != null) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { Integer slotIndex = slots.poll(); List<MongoSubScanSpec> subScanSpecList = endpointFragmentMapping.get(slotIndex); subScanSpecList.add(buildSubScanSpecAndGet(chunkInfo)); slots.offer(slotIndex); } chunksIterator.remove(); } } PriorityQueue<List<MongoSubScanSpec>> minHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR); PriorityQueue<List<MongoSubScanSpec>> maxHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV); for (List<MongoSubScanSpec> listOfScan : endpointFragmentMapping.values()) { if (listOfScan.size() < minPerEndpointSlot) { minHeap.offer(listOfScan); } else if (listOfScan.size() > minPerEndpointSlot) { maxHeap.offer(listOfScan); } } if (chunksToAssignSet.size() > 0) { for (Entry<String, List<ChunkInfo>> chunkEntry : chunksToAssignSet) { for (ChunkInfo chunkInfo : chunkEntry.getValue()) { List<MongoSubScanSpec> smallestList = minHeap.poll(); smallestList.add(buildSubScanSpecAndGet(chunkInfo)); minHeap.offer(smallestList); } } } while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) { List<MongoSubScanSpec> smallestList = minHeap.poll(); List<MongoSubScanSpec> largestList = maxHeap.poll(); smallestList.add(largestList.remove(largestList.size() - 1)); if (largestList.size() > minPerEndpointSlot) { maxHeap.offer(largestList); } if (smallestList.size() < minPerEndpointSlot) { minHeap.offer(smallestList); } } logger.debug("Built assignment map in {} s.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, endpoints, endpointFragmentMapping.toString()); }
From source file:com.joliciel.csvLearner.maxent.MaxentBestFeatureObserver.java
@Override public void onTerminate() { bestFeaturesPerOutcome = new TreeMap<String, List<NameValuePair>>(); totalPerOutcome = new TreeMap<String, Double>(); bestFeatureTotalPerOutcome = new TreeMap<String, Double>(); filePercentagePerOutcome = new TreeMap<String, Map<String, Double>>(); fileNames = new TreeSet<String>(); for (Entry<String, Map<String, Double>> entry : featureMap.entrySet()) { String outcome = entry.getKey(); LOG.debug("outcome: " + outcome); Map<String, Double> featureTotals = entry.getValue(); Map<String, Double> fileTotals = new TreeMap<String, Double>(); PriorityQueue<NameValuePair> heap = new PriorityQueue<NameValuePair>(featureTotals.size(), new NameValueDescendingComparator()); double grandTotal = 0.0; for (Entry<String, Double> featureTotal : featureTotals.entrySet()) { NameValuePair pair = new NameValuePair(featureTotal.getKey(), featureTotal.getValue()); heap.add(pair);/*from w w w . j av a2 s .co m*/ grandTotal += featureTotal.getValue(); String featureKey = featureTotal.getKey(); if (featureKey.contains(CSVLearner.NOMINAL_MARKER)) featureKey = featureKey.substring(0, featureKey.indexOf(CSVLearner.NOMINAL_MARKER)); String fileName = this.featureToFileMap.get(featureKey); Double fileTotalObj = fileTotals.get(fileName); double fileTotal = fileTotalObj == null ? 0 : fileTotalObj.doubleValue(); fileTotals.put(fileName, fileTotal + featureTotal.getValue()); } List<NameValuePair> bestFeatures = new ArrayList<NameValuePair>(); double bestFeatureTotal = 0.0; for (int i = 0; i < n; i++) { NameValuePair pair = heap.poll(); if (pair == null) break; LOG.debug("Feature: " + pair.getName() + ", Total: " + pair.getValue()); bestFeatures.add(pair); bestFeatureTotal += pair.getValue(); } bestFeaturesPerOutcome.put(outcome, bestFeatures); totalPerOutcome.put(outcome, grandTotal); bestFeatureTotalPerOutcome.put(outcome, bestFeatureTotal); // convert the file totals to percentages for (Entry<String, Double> fileTotal : fileTotals.entrySet()) { double filePercentage = fileTotal.getValue() / grandTotal; fileTotal.setValue(filePercentage); fileNames.add(fileTotal.getKey()); } filePercentagePerOutcome.put(outcome, fileTotals); featureTotals.clear(); } featureMap.clear(); featureMap = null; }
From source file:com.joliciel.jochre.lexicon.MostLikelyWordChooserImpl.java
public LetterSequence chooseMostLikelyWord(List<LetterSequence> heap, List<LetterSequence> holdoverHeap, int n) {/* w w w . j a v a2 s .c om*/ LetterSequence bestSequence = null; List<LetterSequence> holdoverWithDash = new ArrayList<LetterSequence>(n); List<LetterSequence> holdoverWithoutDash = new ArrayList<LetterSequence>(n); int i = 0; for (LetterSequence holdoverSequence : holdoverHeap) { if (i >= n) break; if (holdoverSequence.toString().endsWith("-")) holdoverWithDash.add(holdoverSequence); else holdoverWithoutDash.add(holdoverSequence); i++; } PriorityQueue<LetterSequence> combinedHeap = new PriorityQueue<LetterSequence>(); for (LetterSequence sequenceWithDash : holdoverWithDash) { // find the dash that needs to be skipped at the end of sequence 1 for (int j = sequenceWithDash.size() - 1; j >= 0; j--) { Letter outcome = sequenceWithDash.get(j); if (outcome.getString().equals("-")) { sequenceWithDash.setDashToSkip(j); break; } } for (LetterSequence letterSequence : heap) { LetterSequence combinedSequence = this.getLetterGuesserService().getLetterSequence(sequenceWithDash, letterSequence); combinedHeap.add(combinedSequence); } } List<LetterSequence> combinedSequences = new ArrayList<LetterSequence>(); for (i = 0; i < n; i++) { if (combinedHeap.isEmpty()) break; combinedSequences.add(combinedHeap.poll()); } if (holdoverWithoutDash.size() == 0) { // all holdovers end with a dash // therefore we must combine the two sequences bestSequence = this.chooseMostLikelyWord(combinedSequences, n); } else { // some holdovers end with a dash, others don't // need to compare combined sequences with individual sequences LetterSequence bestCombinedSequence = this.chooseMostLikelyWord(combinedSequences, n); // Originally we only included sequences without dashes here // However, this falsifies the results towards those without a dash // especially in the case where sequence 1 or sequence 2 is also a common word (e.g. der in Yiddish) // PriorityQueue<LetterSequence> holdoverHeapWithoutDash = new PriorityQueue<LetterSequence>(holdoverWithoutDash); // LetterSequence bestHoldoverSequenceWithoutDash = this.chooseMostLikelyWord(holdoverHeapWithoutDash, n); // Changed it to the following: LetterSequence bestHoldoverSequence = this.chooseMostLikelyWord(holdoverHeap, n); LetterSequence bestNextRowSequence = this.chooseMostLikelyWord(heap, n); if (LOG.isDebugEnabled()) { LOG.debug("Best combined: " + bestCombinedSequence.toString() + ". Adjusted score: " + bestCombinedSequence.getAdjustedScore()); LOG.debug("Best seq1 separate: " + bestHoldoverSequence.toString() + ". Adjusted score: " + bestHoldoverSequence.getAdjustedScore()); LOG.debug("Best seq2 separate: " + bestNextRowSequence.toString() + ". Adjusted score: " + bestNextRowSequence.getAdjustedScore()); } // Now, to compare the best combined with the best separate scores, we need to get a geometric mean of the shapes // in the best separate ones, and adjust for the lowest frequency word LetterSequence separateSequence = this.letterGuesserService.getLetterSequence(bestHoldoverSequence, bestNextRowSequence); int minFrequency = bestHoldoverSequence.getFrequency() < bestNextRowSequence.getFrequency() ? bestHoldoverSequence.getFrequency() : bestNextRowSequence.getFrequency(); double freqLog = this.getFrequencyAdjustment(minFrequency); double separateAdjustedScore = separateSequence.getScore() * freqLog + additiveSmoothing; separateSequence.setAdjustedScore(separateAdjustedScore); if (LOG.isDebugEnabled()) LOG.debug("Best separate: " + separateSequence.toString() + ". Score: " + separateSequence.getScore() + ". Freq: " + minFrequency + ". Adjusted: " + freqLog + ". Adjusted score: " + separateSequence.getAdjustedScore()); if (bestCombinedSequence.getAdjustedScore() > separateAdjustedScore) { if (LOG.isDebugEnabled()) LOG.debug("Using combined sequence"); bestSequence = bestCombinedSequence; } else { if (LOG.isDebugEnabled()) LOG.debug("Using separate sequences"); bestSequence = this.getLetterGuesserService().getLetterSequence(bestHoldoverSequence, bestNextRowSequence); } if (LOG.isDebugEnabled()) LOG.debug("Best with holdover: " + bestSequence.toString()); } return bestSequence; }