List of usage examples for com.google.common.collect Multimap entries
Collection<Map.Entry<K, V>> entries();
From source file:org.obm.domain.dao.CalendarDaoJdbcImpl.java
private void loadEventExceptions(Connection con, AccessToken token, Map<EventObmId, Event> eventById, AbstractSQLCollectionHelper<?> eventIds) throws SQLException { String query = "SELECT " + EVENT_SELECT_FIELDS + ", eventexception_date as recurrence_id " + ", eventexception_parent_id as parent_id FROM Event e " + "LEFT JOIN EventCategory1 ON e.event_category1_id=eventcategory1_id " + "INNER JOIN Domain ON event_domain_id=domain_id " + "INNER JOIN EventEntity ON evententity_event_id=event_id " + "INNER JOIN UserObm o ON e.event_owner=o.userobm_id " + "INNER JOIN UserObm c ON e.event_usercreate=c.userobm_id " + "INNER JOIN EventException ON e.event_id = eventexception_child_id " + "WHERE eventexception_parent_id IN (" + eventIds.asPlaceHolders() + ") " + "OR eventexception_child_id IN (" + eventIds.asPlaceHolders() + ") "; PreparedStatement ps = null;//from w ww .j a v a2 s .c o m ResultSet rs = null; String domainName = null; Map<EventObmId, Event> evenExcepttById = new HashMap<EventObmId, Event>(); Multimap<Event, Event> eventChildren = ArrayListMultimap.create(); List<Event> changedEvent = new LinkedList<Event>(); Calendar cal = getGMTCalendar(); try { ps = con.prepareStatement(query); int nextId = eventIds.insertValues(ps, 1); eventIds.insertValues(ps, nextId); rs = ps.executeQuery(); while (rs.next()) { Event eventExcept = eventFromCursor(cal, rs); cal.setTimeInMillis(rs.getTimestamp("recurrence_id").getTime()); eventExcept.setRecurrenceId(cal.getTime()); domainName = rs.getString("domain_name"); EventObmId eventId = new EventObmId(rs.getInt("parent_id")); Event event = eventById.get(eventId); if (event != null) { eventChildren.put(event, eventExcept); changedEvent.add(eventExcept); evenExcepttById.put(eventExcept.getObmId(), eventExcept); } } IntegerIndexedSQLCollectionHelper changedIds = new IntegerIndexedSQLCollectionHelper(changedEvent); loadAttendees(con, evenExcepttById, domainName); loadAlerts(con, token, evenExcepttById, changedIds); for (Entry<Event, Event> entry : eventChildren.entries()) { entry.getKey().addEventException(entry.getValue()); } } finally { obmHelper.cleanup(null, ps, rs); } }
From source file:hu.bme.mit.incquery.querymetrics.QueryOnModelMetrics.java
/** * sajt query difficulty: ln(PROD<minden_felsorolhat_constraintre>(#illeszkedsek)) * korrekci: #illeszkedsek + 1 mindenhol * nevez csak akkor szerepel, ha relative == true * diszjunktv minta relative esetben tiltott, egybknt max body szmt * TODO filterRedundants minta + inference *//* w ww .ja va2s. c om*/ public static double calcGabenMetric(Pattern patt, IncQueryEngine engine, boolean relative, boolean filterRedundants) throws IncQueryException { final EList<PatternBody> bodies = patt.getBodies(); if (relative && bodies.size() > 1) throw new IllegalArgumentException(); double max = Double.MIN_VALUE; final NavigationHelper baseIndex = engine.getBaseIndex(); for (PatternBody patternBody : bodies) { double acc = 0.0; final Multimap<Variable, EClassifier> deferredClassifiers = HashMultimap.create(); final Multimap<Variable, EClassifier> inferredClassifiers = HashMultimap.create(); final EList<Constraint> constraints = patternBody.getConstraints(); for (Constraint constraint : constraints) { if (constraint instanceof PatternCompositionConstraint) { final PatternCompositionConstraint compo = (PatternCompositionConstraint) constraint; if (!compo.isNegative()) { final PatternCall call = compo.getCall(); final Pattern patternRef = call.getPatternRef(); if (!call.isTransitive()) { final int count = calcCountMatches(patternRef, engine); acc += Math.log(nonZero(count)); } else { throw new UnsupportedOperationException( "transitive closure estimate not supported yet"); } } } else if (constraint instanceof EClassifierConstraint) { final EClassifierConstraint classifierConstraint = (EClassifierConstraint) constraint; final Variable variable = classifierConstraint.getVar().getVariable(); final EntityType type = classifierConstraint.getType(); if (type instanceof ClassType) { final EClassifier classifier = ((ClassType) type).getClassname(); deferredClassifiers.put(variable, classifier); } else throw new UnsupportedOperationException("unknown entity type " + type.toString()); } else if (constraint instanceof PathExpressionConstraint) { final PathExpressionHead head = ((PathExpressionConstraint) constraint).getHead(); for (PathExpressionTail tail = head.getTail(); tail != null; tail = tail.getTail()) { final Type type = tail.getType(); if (type instanceof ReferenceType) { EStructuralFeature feature = ((ReferenceType) type).getRefname(); if (!baseIndex.isInWildcardMode()) baseIndex.registerEStructuralFeatures(Collections.singleton(feature)); int count = 0; final Collection<EObject> holders = baseIndex.getHoldersOfFeature(feature); for (EObject source : holders) { count += baseIndex.getFeatureTargets(source, feature).size(); } acc += Math.log(nonZero(count)); // inference if (tail == head.getTail()) { final Variable variable = head.getSrc().getVariable(); final EClass eContainingClass = feature.getEContainingClass(); inferredClassifiers.put(variable, eContainingClass); final EList<EClass> eAllSuperTypes = eContainingClass.getEAllSuperTypes(); for (EClass superType : eAllSuperTypes) { inferredClassifiers.put(variable, superType); } } if (tail.getTail() == null) { final ValueReference dst = head.getDst(); if (dst instanceof VariableReference) { final Variable variable = ((VariableReference) dst).getVariable(); final EClassifier eType = feature.getEType(); inferredClassifiers.put(variable, eType); if (eType instanceof EClass) for (EClass superType : ((EClass) eType).getEAllSuperTypes()) { inferredClassifiers.put(variable, superType); } } } } else throw new UnsupportedOperationException( "unknown path expression feature type: " + type.getClass().getSimpleName()); } } } for (Entry<Variable, EClassifier> entry : deferredClassifiers.entries()) { final Variable variable = entry.getKey(); final EClassifier classifier = entry.getValue(); if (filterRedundants && inferredClassifiers.containsEntry(variable, classifier)) continue; if (classifier instanceof EClass) { final EClass clazz = (EClass) classifier; if (!baseIndex.isInWildcardMode()) baseIndex.registerEClasses(Collections.singleton(clazz)); final int count = baseIndex.getAllInstances(clazz).size(); acc += Math.log(nonZero(count)); } else if (classifier instanceof EDataType) { final EDataType datatType = (EDataType) classifier; if (!baseIndex.isInWildcardMode()) baseIndex.registerEDataTypes(Collections.singleton(datatType)); final int count = baseIndex.getDataTypeInstances(datatType).size(); acc += Math.log(nonZero(count)); } else throw new UnsupportedOperationException( "unknown classifier type in ClassType: " + classifier.getClass().getSimpleName()); } if (max < acc) max = acc; } if (relative) { final int countMatches = calcCountMatches(patt, engine); double base = -Math.log(nonZero(countMatches)); return base + max; } else return max; }
From source file:org.eclipse.sirius.common.tools.api.interpreter.JavaExtensionsManager.java
private void reloadEPackages() { Multimap<String, EPackage> newDeclarations = HashMultimap.create(); Set<String> newDeclarersAsBundles = Sets.newLinkedHashSet(); Collection<EPackageDeclarationSource> ecoreDeclarationSources = this.classLoading .findEcoreDeclarations(this.viewpointProjects, this.viewpointPlugins); Collection<EPackageDeclarationSource> workspaceDeclarations = Lists.newArrayList(); for (EPackageLoadingCallback.EPackageDeclarationSource declarer : ecoreDeclarationSources) { if (declarer.isBundle()) { newDeclarersAsBundles.add(declarer.getSymbolicName()); for (EPackageDeclaration ePackageDeclaration : declarer.getEPackageDeclarations()) { /*/*from ww w. jav a 2 s .c om*/ * the EPackage definition comes from a deployed plugin, we * retrieve the EPackage instance to use by getting it from * the global registry. */ EPackage pak = EPackage.Registry.INSTANCE.getEPackage(ePackageDeclaration.getNsURI()); if (pak != null) { newDeclarations.put(declarer.getSymbolicName(), pak); } } } else { /* * we keep that for later as we need to initialize a specific * resourceset which will be used by all the subsequent * loadings. */ workspaceDeclarations.add(declarer); } } if (workspaceDeclarations.size() > 0) { /* * this resourceset is being used to load the genmodel instances * from the workspace. It is setup with uri mappings so that other * Ecore residing in the workspace are shadowing the ones from the * targetplatform. */ ResourceSetImpl set = new ResourceSetImpl(); computePlatformURIMap(set); /* * the EPackage definition comes from a workspace project, right now * we don't explicitely and fully support this use case where the * Ecore model lives in the workspace next to the .odesign * specification. To properly support this use case we would have to * load the corresponding genmodel and register it, making sure we * clean all the */ for (EPackageDeclarationSource workspaceSource : workspaceDeclarations) { Map<String, EPackage> ecorePackages = Maps.newLinkedHashMap(); /* * a first iteration to populate the map of loaded Ecore * packages. */ loadAndFindEPackages(set, workspaceSource, ecorePackages); /* * a second iteration to declare the EPackages */ for (EPackageDeclaration declaration : workspaceSource.getEPackageDeclarations()) { String nsURI = declaration.getNsURI(); if (!StringUtil.isEmpty(nsURI)) { EPackage loaded = ecorePackages.get(nsURI); if (loaded != null) { newDeclarations.put(nsURI, loaded); } } } } } /* * cleaning up previously registered EPackage which are not accessible * any more. */ boolean firstRun = lastDeclarerIDsInBundles == null; if (!firstRun) { for (Entry<String, EPackage> entry : lastDeclarerIDsToEPackages.entries()) { boolean changedType = lastDeclarerIDsInBundles.contains(entry.getKey()) != newDeclarersAsBundles .contains(entry.getKey()); if (changedType) { unloadedEPackage(entry.getValue()); } } } for (Entry<String, EPackage> entry : newDeclarations.entries()) { boolean changedType = firstRun || lastDeclarerIDsInBundles .contains(entry.getKey()) != newDeclarersAsBundles.contains(entry.getKey()); if (changedType) { loadedEPackage(entry.getValue()); } } this.lastDeclarerIDsToEPackages = newDeclarations; this.lastDeclarerIDsInBundles = newDeclarersAsBundles; }
From source file:org.apache.calcite.rel.rules.AbstractMaterializedViewRule.java
/** * Rewriting logic is based on "Optimizing Queries Using Materialized Views: * A Practical, Scalable Solution" by Goldstein and Larson. * * <p>On the query side, rules matches a Project-node chain or node, where node * is either an Aggregate or a Join. Subplan rooted at the node operator must * be composed of one or more of the following operators: TableScan, Project, * Filter, and Join./* w w w.j a v a 2s.com*/ * * <p>For each join MV, we need to check the following: * <ol> * <li> The plan rooted at the Join operator in the view produces all rows * needed by the plan rooted at the Join operator in the query.</li> * <li> All columns required by compensating predicates, i.e., predicates that * need to be enforced over the view, are available at the view output.</li> * <li> All output expressions can be computed from the output of the view.</li> * <li> All output rows occur with the correct duplication factor. We might * rely on existing Unique-Key - Foreign-Key relationships to extract that * information.</li> * </ol> * * <p>In turn, for each aggregate MV, we need to check the following: * <ol> * <li> The plan rooted at the Aggregate operator in the view produces all rows * needed by the plan rooted at the Aggregate operator in the query.</li> * <li> All columns required by compensating predicates, i.e., predicates that * need to be enforced over the view, are available at the view output.</li> * <li> The grouping columns in the query are a subset of the grouping columns * in the view.</li> * <li> All columns required to perform further grouping are available in the * view output.</li> * <li> All columns required to compute output expressions are available in the * view output.</li> * </ol> */ protected void perform(RelOptRuleCall call, Project topProject, RelNode node) { final RexBuilder rexBuilder = node.getCluster().getRexBuilder(); final RelMetadataQuery mq = RelMetadataQuery.instance(); final RelOptPlanner planner = call.getPlanner(); final RexSimplify simplify = new RexSimplify(rexBuilder, true, planner.getExecutor() != null ? planner.getExecutor() : RexUtil.EXECUTOR); final List<RelOptMaterialization> materializations = (planner instanceof VolcanoPlanner) ? ((VolcanoPlanner) planner).getMaterializations() : ImmutableList.<RelOptMaterialization>of(); if (!materializations.isEmpty()) { // 1. Explore query plan to recognize whether preconditions to // try to generate a rewriting are met if (!isValidPlan(topProject, node, mq)) { return; } // Obtain applicable (filtered) materializations // TODO: Filtering of relevant materializations needs to be // improved so we gather only materializations that might // actually generate a valid rewriting. final List<RelOptMaterialization> applicableMaterializations = RelOptMaterializations .getApplicableMaterializations(node, materializations); if (!applicableMaterializations.isEmpty()) { // 2. Initialize all query related auxiliary data structures // that will be used throughout query rewriting process // Generate query table references final Set<RelTableRef> queryTableRefs = mq.getTableReferences(node); if (queryTableRefs == null) { // Bail out return; } // Extract query predicates final RelOptPredicateList queryPredicateList = mq.getAllPredicates(node); if (queryPredicateList == null) { // Bail out return; } final RexNode pred = simplify.simplify( RexUtil.composeConjunction(rexBuilder, queryPredicateList.pulledUpPredicates, false)); final Triple<RexNode, RexNode, RexNode> queryPreds = splitPredicates(rexBuilder, pred); // Extract query equivalence classes. An equivalence class is a set // of columns in the query output that are known to be equal. final EquivalenceClasses qEC = new EquivalenceClasses(); for (RexNode conj : RelOptUtil.conjunctions(queryPreds.getLeft())) { assert conj.isA(SqlKind.EQUALS); RexCall equiCond = (RexCall) conj; qEC.addEquivalenceClass((RexTableInputRef) equiCond.getOperands().get(0), (RexTableInputRef) equiCond.getOperands().get(1)); } // 3. We iterate through all applicable materializations trying to // rewrite the given query for (RelOptMaterialization materialization : applicableMaterializations) { final Project topViewProject; final RelNode viewNode; if (materialization.queryRel instanceof Project) { topViewProject = (Project) materialization.queryRel; viewNode = topViewProject.getInput(); } else { topViewProject = null; viewNode = materialization.queryRel; } // 3.1. View checks before proceeding if (!isValidPlan(topViewProject, viewNode, mq)) { // Skip it continue; } // 3.2. Initialize all query related auxiliary data structures // that will be used throughout query rewriting process // Extract view predicates final RelOptPredicateList viewPredicateList = mq.getAllPredicates(viewNode); if (viewPredicateList == null) { // Skip it continue; } final RexNode viewPred = simplify.simplify( RexUtil.composeConjunction(rexBuilder, viewPredicateList.pulledUpPredicates, false)); final Triple<RexNode, RexNode, RexNode> viewPreds = splitPredicates(rexBuilder, viewPred); // Extract view table references final Set<RelTableRef> viewTableRefs = mq.getTableReferences(viewNode); if (viewTableRefs == null) { // Bail out return; } // Extract view tables MatchModality matchModality; Multimap<RexTableInputRef, RexTableInputRef> compensationEquiColumns = ArrayListMultimap .create(); if (!queryTableRefs.equals(viewTableRefs)) { // We try to compensate, e.g., for join queries it might be // possible to join missing tables with view to compute result. // Two supported cases: query tables are subset of view tables (we need to // check whether they are cardinality-preserving joins), or view tables are // subset of query tables (add additional tables through joins if possible) if (viewTableRefs.containsAll(queryTableRefs)) { matchModality = MatchModality.QUERY_PARTIAL; final EquivalenceClasses vEC = new EquivalenceClasses(); for (RexNode conj : RelOptUtil.conjunctions(viewPreds.getLeft())) { assert conj.isA(SqlKind.EQUALS); RexCall equiCond = (RexCall) conj; vEC.addEquivalenceClass((RexTableInputRef) equiCond.getOperands().get(0), (RexTableInputRef) equiCond.getOperands().get(1)); } if (!compensateQueryPartial(compensationEquiColumns, viewTableRefs, vEC, queryTableRefs)) { // Cannot rewrite, skip it continue; } } else if (queryTableRefs.containsAll(viewTableRefs)) { // TODO: implement latest case matchModality = MatchModality.VIEW_PARTIAL; continue; } else { // Skip it continue; } } else { matchModality = MatchModality.COMPLETE; } // 4. We map every table in the query to a view table with the same qualified // name. final Multimap<RelTableRef, RelTableRef> multiMapTables = ArrayListMultimap.create(); for (RelTableRef queryTableRef : queryTableRefs) { for (RelTableRef viewTableRef : viewTableRefs) { if (queryTableRef.getQualifiedName().equals(viewTableRef.getQualifiedName())) { multiMapTables.put(queryTableRef, viewTableRef); } } } // If a table is used multiple times, we will create multiple mappings, // and we will try to rewrite the query using each of the mappings. // Then, we will try to map every source table (query) to a target // table (view), and if we are successful, we will try to create // compensation predicates to filter the view results further // (if needed). final List<BiMap<RelTableRef, RelTableRef>> flatListMappings = generateTableMappings( multiMapTables); for (BiMap<RelTableRef, RelTableRef> tableMapping : flatListMappings) { // 4.0. If compensation equivalence classes exist, we need to add // the mapping to the query mapping final EquivalenceClasses currQEC = EquivalenceClasses.copy(qEC); if (matchModality == MatchModality.QUERY_PARTIAL) { for (Entry<RexTableInputRef, RexTableInputRef> e : compensationEquiColumns.entries()) { // Copy origin RelTableRef queryTableRef = tableMapping.inverse().get(e.getKey().getTableRef()); RexTableInputRef queryColumnRef = RexTableInputRef.of(queryTableRef, e.getKey().getIndex(), e.getKey().getType()); // Add to query equivalence classes and table mapping currQEC.addEquivalenceClass(queryColumnRef, e.getValue()); tableMapping.put(e.getValue().getTableRef(), e.getValue().getTableRef()); //identity } } final RexNode compensationColumnsEquiPred; final RexNode compensationRangePred; final RexNode compensationResidualPred; // 4.1. Establish relationship between view and query equivalence classes. // If every view equivalence class is not a subset of a query // equivalence class, we bail out. // To establish relationship, we swap column references of the view predicates // to point to query tables. Then, we create the equivalence classes for the // view predicates and check that every view equivalence class is a subset of a // query equivalence class: if it is not, we bail out. final RexNode viewColumnsEquiPred = RexUtil.swapTableReferences(rexBuilder, viewPreds.getLeft(), tableMapping.inverse()); final EquivalenceClasses queryBasedVEC = new EquivalenceClasses(); for (RexNode conj : RelOptUtil.conjunctions(viewColumnsEquiPred)) { assert conj.isA(SqlKind.EQUALS); RexCall equiCond = (RexCall) conj; queryBasedVEC.addEquivalenceClass((RexTableInputRef) equiCond.getOperands().get(0), (RexTableInputRef) equiCond.getOperands().get(1)); } compensationColumnsEquiPred = generateEquivalenceClasses(rexBuilder, currQEC, queryBasedVEC); if (compensationColumnsEquiPred == null) { // Skip it continue; } // 4.2. We check that range intervals for the query are contained in the view. // Compute compensating predicates. final RexNode queryRangePred = RexUtil.swapColumnReferences(rexBuilder, queryPreds.getMiddle(), currQEC.getEquivalenceClassesMap()); final RexNode viewRangePred = RexUtil.swapTableColumnReferences(rexBuilder, viewPreds.getMiddle(), tableMapping.inverse(), currQEC.getEquivalenceClassesMap()); compensationRangePred = SubstitutionVisitor.splitFilter(simplify, queryRangePred, viewRangePred); if (compensationRangePred == null) { // Skip it continue; } // 4.3. Finally, we check that residual predicates of the query are satisfied // within the view. // Compute compensating predicates. final RexNode queryResidualPred = RexUtil.swapColumnReferences(rexBuilder, queryPreds.getRight(), currQEC.getEquivalenceClassesMap()); final RexNode viewResidualPred = RexUtil.swapTableColumnReferences(rexBuilder, viewPreds.getRight(), tableMapping.inverse(), currQEC.getEquivalenceClassesMap()); compensationResidualPred = SubstitutionVisitor.splitFilter(simplify, queryResidualPred, viewResidualPred); if (compensationResidualPred == null) { // Skip it continue; } // 4.4. Final compensation predicate. RexNode compensationPred = RexUtil.composeConjunction(rexBuilder, ImmutableList .of(compensationColumnsEquiPred, compensationRangePred, compensationResidualPred), false); if (!compensationPred.isAlwaysTrue()) { // All columns required by compensating predicates must be contained // in the view output (condition 2). List<RexNode> viewExprs = extractExpressions(topViewProject, viewNode, rexBuilder); compensationPred = rewriteExpression(rexBuilder, viewNode, viewExprs, compensationPred, tableMapping, currQEC.getEquivalenceClassesMap(), mq); if (compensationPred == null) { // Skip it continue; } } // 4.5. Generate final rewriting if possible. // First, we add the compensation predicate (if any) on top of the view. // Then, we trigger the Aggregate unifying method. This method will either create // a Project or an Aggregate operator on top of the view. It will also compute the // output expressions for the query. RelBuilder builder = call.builder(); builder.push(materialization.tableRel); if (!compensationPred.isAlwaysTrue()) { builder.filter(simplify.simplify(compensationPred)); } RelNode result = unify(rexBuilder, builder, builder.build(), topProject, node, topViewProject, viewNode, tableMapping, currQEC.getEquivalenceClassesMap(), mq); if (result == null) { // Skip it continue; } call.transformTo(result); } } } } }
From source file:org.commoncrawl.util.NodeAffinityMaskBuilder.java
public static String buildNodeAffinityMask(FileSystem fileSystem, Path partFileDirectory, Map<Integer, String> optionalRootMapHint, Set<String> excludedNodeList, int maxReducersPerNode, boolean skipBalance) throws IOException { TreeMap<Integer, String> partitionToNodeMap = new TreeMap<Integer, String>(); FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory, "part-*")); if (paths.length == 0) { throw new IOException("Invalid source Path:" + partFileDirectory); }//w ww . ja v a 2 s . c o m Multimap<String, Integer> inverseMap = TreeMultimap.create(); Map<Integer, List<String>> paritionToDesiredCandidateList = new TreeMap<Integer, List<String>>(); // iterate paths for (FileStatus path : paths) { String currentFile = path.getPath().getName(); int partitionNumber; try { if (currentFile.startsWith("part-r")) { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue(); } else { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue(); } } catch (ParseException e) { throw new IOException("Invalid Part Name Encountered:" + currentFile); } // get block locations BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen()); // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node if (optionalRootMapHint != null) { // the host all blocks should reside on String desiredHost = optionalRootMapHint.get(partitionNumber); ArrayList<String> misplacedBlocks = new ArrayList<String>(); // ok walk all blocks for (BlockLocation location : locations) { boolean found = false; for (String host : location.getHosts()) { if (host.compareTo(desiredHost) == 0) { found = true; break; } } if (!found) { misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath() + " did not contain desired location:" + desiredHost); } } // ok pass test at a certain threshold if (misplacedBlocks.size() != 0 && ((float) misplacedBlocks.size() / (float) locations.length) > .50f) { LOG.error("Misplaced Blocks Exceed Threshold"); for (String misplacedBlock : misplacedBlocks) { LOG.error(misplacedBlock); } // TODO: SKIP THIS STEP FOR NOW ??? //throw new IOException("Misplaced Blocks Exceed Threshold!"); } partitionToNodeMap.put(partitionNumber, desiredHost); } else { if (excludedNodeList != null) { // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString()); } // ok ask file system for block locations TreeMap<String, Integer> nodeToBlockCount = new TreeMap<String, Integer>(); for (BlockLocation location : locations) { for (String host : location.getHosts()) { if (excludedNodeList == null || !excludedNodeList.contains(host)) { Integer nodeHitCount = nodeToBlockCount.get(host); if (nodeHitCount == null) { nodeToBlockCount.put(host, 1); } else { nodeToBlockCount.put(host, nodeHitCount.intValue() + 1); } } } } if (nodeToBlockCount.size() == 0) { throw new IOException("No valid nodes found for partition number:" + path); } Map.Entry<String, Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]); Arrays.sort(entries, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { return o1.getValue().intValue() < o2.getValue().intValue() ? 1 : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1; } }); // build a list of nodes by priority ... List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries), new Function<Map.Entry<String, Integer>, String>() { @Override public String apply(Entry<String, Integer> entry) { return entry.getKey(); } }); // stash it away ... paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority); //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue()); partitionToNodeMap.put(partitionNumber, entries[0].getKey()); // store the inverse mapping ... inverseMap.put(entries[0].getKey(), partitionNumber); } } if (skipBalance) { // walk partition map to make sure everything is assigned ... /* for (String node : inverseMap.keys()) { if (inverseMap.get(node).size() > maxReducersPerNode) { throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size()); } } */ } // now if optional root map hint is null if (optionalRootMapHint == null && !skipBalance) { // figure out if there is an imbalance int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size()); int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size()); LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode); // two passes .. for (int pass = 0; pass < 2; ++pass) { LOG.info("Pass:" + pass); // iterate nodes ... for (String node : ImmutableSet.copyOf(inverseMap.keySet())) { // get paritions in map Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node)); // if parition count exceeds desired average ... if (paritions.size() > maxRegionsPerNode) { // first pass, assign based on preference if (pass == 0) { LOG.info("Node:" + node + " parition count:" + paritions.size() + " exceeds avg:" + avgRegionsPerNode); // walk partitions trying to find a node to discrard the parition to for (int partition : paritions) { for (String candidate : paritionToDesiredCandidateList.get(partition)) { if (!candidate.equals(node)) { // see if this candidate has room .. if (inverseMap.get(candidate).size() < avgRegionsPerNode) { LOG.info("REASSIGNING parition:" + partition + " from Node:" + node + " to Node:" + candidate); // found match reassign it ... inverseMap.remove(node, partition); inverseMap.put(candidate, partition); break; } } } // break out if reach our desired number of paritions for this node if (inverseMap.get(node).size() == avgRegionsPerNode) break; } } // second pass ... assign based on least loaded node ... else { int desiredRelocations = paritions.size() - maxRegionsPerNode; LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations + " partitions:" + paritions.size()); for (int i = 0; i < desiredRelocations; ++i) { String leastLoadedNode = null; int leastLoadedNodePartitionCount = 0; for (String candidateNode : inverseMap.keySet()) { if (leastLoadedNode == null || inverseMap.get(candidateNode) .size() < leastLoadedNodePartitionCount) { leastLoadedNode = candidateNode; leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size(); } } int bestPartition = -1; int bestParitionOffset = -1; for (int candidateParition : inverseMap.get(node)) { int offset = 0; for (String nodeCandidate : paritionToDesiredCandidateList .get(candidateParition)) { if (nodeCandidate.equals(leastLoadedNode)) { if (bestPartition == -1 || bestParitionOffset > offset) { bestPartition = candidateParition; bestParitionOffset = offset; } break; } offset++; } } if (bestPartition == -1) { bestPartition = Iterables.get(inverseMap.get(node), 0); } LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node + " to Node:" + leastLoadedNode); // found match reassign it ... inverseMap.remove(node, bestPartition); inverseMap.put(leastLoadedNode, bestPartition); } } } } } LOG.info("Rebuilding parition to node map based on ideal balance"); for (String node : inverseMap.keySet()) { LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:" + inverseMap.get(node).toString()); } partitionToNodeMap.clear(); for (Map.Entry<String, Integer> entry : inverseMap.entries()) { partitionToNodeMap.put(entry.getValue(), entry.getKey()); } } StringBuilder builder = new StringBuilder(); int itemCount = 0; for (Map.Entry<Integer, String> entry : partitionToNodeMap.entrySet()) { if (itemCount++ != 0) builder.append("\t"); builder.append(entry.getKey().intValue() + "," + entry.getValue()); } return builder.toString(); }