Java tutorial
/******************************************************************************* * Copyright (c) 2013 * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * * * Contributors: * Benjamin Klatt - initial API and implementation and/or initial documentation *******************************************************************************/ package org.splevo.diffing.match; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import org.eclipse.emf.common.util.URI; import; import; import org.eclipse.emf.ecore.resource.Resource; import org.splevo.diffing.util.NormalizationUtil; import; import; import; import; import; import; import; import; import; import; import; /** * Hierarchical resource matcher to initialize the hierarchical name match strategy and the standard * RootIDMatchingStrategy as only matching strategies to be applied by a match engine. */ public class HierarchicalStrategyResourceMatcher extends StrategyResourceMatcher { /** Index to assign the left resources to their file name. */ private ListMultimap<String, Resource> filenameResourcesIndexLeft = ArrayListMultimap.create(); /** Index to assign the right resources to their file name. */ private ListMultimap<String, Resource> filenameResourcesIndexRight = ArrayListMultimap.create(); /** Patterns to replace with the defined target string in the URIs string representations. */ private LinkedHashMap<Pattern, String> uriNormalizationPatterns = Maps.newLinkedHashMap(); /** Patterns to replace with the defined target string in the URIs string representations. */ private LinkedHashMap<Pattern, String> filenameNormalizationPatterns = Maps.newLinkedHashMap(); /** * Constructor for default matching strategy without any renaming processing. */ public HierarchicalStrategyResourceMatcher() { } /** * Constructor to specify pattern mappings to handle possible renaming. * * @param uriNormalizationPatterns * A map with entries having a key representing a regular expression to match and a * replacement string to set in case of a match. * @param fileNameNormalizationPatterns * A map with entries having a key of a regular expression to match and a replacement * string to set in case of a match. * */ public HierarchicalStrategyResourceMatcher(LinkedHashMap<Pattern, String> uriNormalizationPatterns, LinkedHashMap<Pattern, String> fileNameNormalizationPatterns) { this.uriNormalizationPatterns = uriNormalizationPatterns; this.filenameNormalizationPatterns = fileNameNormalizationPatterns; } /** * Create the mappings between two lists of resources. If both lists contain only one resource, * they are always matches as assumed this was triggered explicitly.<br> * {@inheritDoc} */ @Override public Iterable<MatchResource> createMappings(Iterator<? extends Resource> leftResources, Iterator<? extends Resource> rightResources, Iterator<? extends Resource> originResources) { final List<MatchResource> mappings = new ArrayList<MatchResource>(); indexResources(leftResources, filenameResourcesIndexLeft, filenameNormalizationPatterns); indexResources(rightResources, filenameResourcesIndexRight, null); Set<String> allSegments = Sets.union(filenameResourcesIndexLeft.keySet(), filenameResourcesIndexRight.keySet()); List<String> allSegmentsCopy = Lists.newArrayList(allSegments); for (String segment : allSegmentsCopy) { List<Resource> leftCandidates = Lists.newArrayList(filenameResourcesIndexLeft.get(segment)); List<Resource> rightCandidates = Lists.newArrayList(filenameResourcesIndexRight.get(segment)); if (leftCandidates.size() == 1 && rightCandidates.size() == 1) { Resource left = leftCandidates.get(0); Resource right = rightCandidates.get(0); mappings.add(createMatchResource(left, right, null)); removeFromIndex(filenameResourcesIndexLeft, left); removeFromIndex(filenameResourcesIndexRight, right); } else if (leftCandidates.size() != 0 && rightCandidates.size() != 0) { matchBestMatches(leftCandidates, rightCandidates, mappings); } } Collection<Resource> remainingLeftResources = Sets.newLinkedHashSet(filenameResourcesIndexLeft.values()); for (Resource left : remainingLeftResources) { mappings.add(createMatchResource(left, null, null)); } Collection<Resource> remainingRightResources = Sets.newLinkedHashSet(filenameResourcesIndexRight.values()); for (Resource right : remainingRightResources) { mappings.add(createMatchResource(null, right, null)); } return mappings; } /** * Create matches for the left and right candidates. A match is only created if a pair is the * best match for both sides. * * Internally, indexes are build to identify the total number of matches and the best matches * for both candidate lists. * * @param leftCandidates * The left candidates to search matches for. * @param rightCandidates * The right candidates to search matches for. * @param mappings * The list of mappings to fill. */ private void matchBestMatches(List<Resource> leftCandidates, List<Resource> rightCandidates, List<MatchResource> mappings) { // index for a fast lookup of the highest match score for a resource. HashMap<Resource, Integer> bestMatchCountIndex = new HashMap<Resource, Integer>(); // mappings for a resource to it's best matches // This is implemented as multimap to support renaming and derived copies. // in such a case, an original resource might map two times: // To the still existing same class as well as the modified, derived copy // see SPLEVO-181 for details {@link} LinkedListMultimap<Resource, Resource> bestMatchIndexLeft = LinkedListMultimap.create(); LinkedListMultimap<Resource, Resource> bestMatchIndexRight = LinkedListMultimap.create(); for (Resource leftRes : leftCandidates) { for (Resource rightRes : rightCandidates) { int matchCount = getMatchingSegmentsPathOnly(leftRes, rightRes); if (!bestMatchCountIndex.containsKey(leftRes) || bestMatchCountIndex.get(leftRes) < matchCount) { bestMatchCountIndex.put(leftRes, matchCount); bestMatchIndexLeft.removeAll(leftRes); bestMatchIndexLeft.put(leftRes, rightRes); } else if (bestMatchCountIndex.get(leftRes) == matchCount) { bestMatchCountIndex.put(leftRes, matchCount); bestMatchIndexLeft.put(leftRes, rightRes); } if (!bestMatchCountIndex.containsKey(rightRes) || bestMatchCountIndex.get(rightRes) < matchCount) { bestMatchCountIndex.put(rightRes, matchCount); bestMatchIndexRight.removeAll(rightRes); bestMatchIndexRight.put(rightRes, leftRes); } else if (bestMatchCountIndex.get(rightRes) == matchCount) { bestMatchCountIndex.put(rightRes, matchCount); bestMatchIndexRight.put(rightRes, leftRes); } } } List<MatchResource> bestMatches = createMatchElementsForBestMatches(bestMatchCountIndex, bestMatchIndexLeft); mappings.addAll(bestMatches); } /** * Create match elements for valid best matching pairs. * * For the best match of each left resource, create a match element if this match-pair is also * best available match for the right resource in the pair. * * This supports original resources matched to one or more new resources.<br> * This is required to support renaming and derived copies as described in the according Jira * Issue:<br> * SPLEVO-181 for details {@link} * * TODO: Check if a match should be prevented if it is only 1<br> * 1 means only the filename is the same. The resources are expected to be located relative * folders and the URI is an absolute uri. On the other hand, the path to the root folder might * be different.<br> * subfolderleft/resource.xmi vs. differentsubfolder/resource.xmi<br> * vs.<br> * rootfolderlef/resource.xmi vs rootsfolderright/resource.xmi<br> * * @param bestMatchCountIndex * The best match qualifiers for each resource (left and right). * @param bestMatchIndexLeft * The pairs of best matches for the left resource. * @return The valid resource matches identified. */ private List<MatchResource> createMatchElementsForBestMatches(HashMap<Resource, Integer> bestMatchCountIndex, LinkedListMultimap<Resource, Resource> bestMatchIndexLeft) { List<MatchResource> mappings = Lists.newArrayList(); for (Resource leftRes : bestMatchIndexLeft.keySet()) { List<Resource> rightRessources = bestMatchIndexLeft.get(leftRes); for (Resource rightRes : rightRessources) { if (bestMatchCountIndex.get(leftRes) == bestMatchCountIndex.get(rightRes)) { mappings.add(createMatchResource(leftRes, rightRes, null)); removeFromIndex(filenameResourcesIndexLeft, leftRes); removeFromIndex(filenameResourcesIndexRight, rightRes); } } } mappings = filterDuplicateMappings(mappings); return mappings; } /** * Filter duplicate matched compilation units and classes due to derived copy matches. * * Otherwise the EMF Compare engine would register original elements which are intended to be * identified as deleted, have matches due to the orginal class that must be present and they * matched to. * * {@inheritDoc} */ private List<MatchResource> filterDuplicateMappings(List<MatchResource> matches) { List<MatchResource> filteredMatches = Lists.newLinkedList(matches); // index used to identify duplicate original elements (e.g. for DerivedCopy detection) Multimap<Resource, MatchResource> rightMatchedIndex = LinkedHashMultimap.create(); for (MatchResource match : matches) { Resource right = match.getRight(); if (right != null) { rightMatchedIndex.get((Resource) right).add(match); } } // For duplicate matches keep only those with the same name for (Resource right : rightMatchedIndex.keySet()) { if (rightMatchedIndex.get(right).size() > 1) { String rightName = right.getURI().lastSegment(); for (MatchResource match : rightMatchedIndex.get(right)) { Resource left = (Resource) match.getLeft(); String leftName = Strings.nullToEmpty(left.getURI().lastSegment()); if (leftName.equals(rightName)) { filteredMatches.remove(match); } } } } return filteredMatches; } /** * Remove a resource entry from the index for all segments it has been registered for. * * @param index * The index to clean. * @param resource * The resource to remove */ private void removeFromIndex(ListMultimap<String, Resource> index, Resource resource) { List<String> keys = Lists.newArrayList(index.keySet()); for (String key : keys) { index.remove(key, resource); } } /** * Count the number of matching segments for two resources. * * <p> * The segment comparison starts from the end of the resources' URIs except of the filename.<br> * The filename is expected to match for the resources provided to this method (including * renaming awareness etc.). The comparison is done beginning from the end, because the sources * of the same software product might be stored at different locations on the disk. So the * beginning of the URIs / the locations where the implementations are stored, are expected to * be different anyway. * </p> * * @param leftResource * The left resource to compare the uri. * @param rightResource * The right resource to compare the uri. * @return The number of matching segments of the resources. */ private int getMatchingSegmentsPathOnly(Resource leftResource, Resource rightResource) { URI leftURI = leftResource.getURI(); URI rightUri = rightResource.getURI(); int count = 0; String[] segmentsLeft = processRenamingNormalizations(leftURI.segments()); String[] segmentsRight = rightUri.segments(); segmentsLeft = removeLast(segmentsLeft); segmentsRight = removeLast(segmentsRight); int leftLength = segmentsLeft.length; int rightLength = segmentsRight.length; for (int i = 0; i < leftLength; i++) { if (i >= rightLength) { break; } String refString = segmentsLeft[leftLength - 1 - i]; String compString = segmentsRight[rightLength - 1 - i]; if (refString.equals(compString)) { count++; } else { break; } } return count; } /** * Remove the last element of an array. * * In case of an empty array or null provided, also an empty array will be returned. * * @param array * The array to remove the last element of. * @return A copy of the original array without it's last element. */ private String[] removeLast(String[] array) { if (array == null || array.length == 0) { return new String[] {}; } return Arrays.copyOfRange(array, 0, array.length - 1); } /** * Resources are identified by URIs consisting of segments (directories and files).<br> * To apply a java package renaming normalization that also manifests in the source directories, * the normalization pattern provided as string containing "." characters must be mapped to the * array of URI segment strings. * * This is done by merging the URI segments with "." as glue character. As a result also the * directory segments of the URI representing the absolute path of the source directory are * joined with a dot. Later on, when the joined string is split again using "." as split * character, the complete string and also the absolute path part is split. As a result, when * the absolute path contained a "." character, a split will be performed for that.<br> * For example: An eclipse project named "my.first.project" will become a single segment, but * split into three segments when the absolute segment string is split again. if this becomes an * issue later on, the logic of this method must be adapted. However, any character potentially * lead to the same problem as different file systems also allow for different directory names. * * Note: The last segment representing the filename will be preserved even if it contains a dot * e.g. to separate the file extension. * * @param segmentsLeft * The source array to process. * @return The resulting array after processing. */ private String[] processRenamingNormalizations(String[] segmentsLeft) { String leftFilename = segmentsLeft[segmentsLeft.length - 1]; StringBuffer sb = new StringBuffer(); for (int i = 0; i < segmentsLeft.length - 1; i++) { if (i > 0) { sb.append('.'); } sb.append(segmentsLeft[i]); } String leftSegmentsAsString = NormalizationUtil.normalizeNamespace(sb.toString(), uriNormalizationPatterns); List<String> split = Lists.newArrayList(Splitter.on('.').split(leftSegmentsAsString)); split.add(leftFilename); segmentsLeft = Iterables.toArray(split, String.class); return segmentsLeft; } /** * Index a set of resources according to their last segment. * * @param resources * The resources to index. * @param index * The index to put them in. * @param fileNameNormalizationPatterns * The list of patterns to apply during resource indexing. Null or an empty list if * none should be applied. */ private void indexResources(Iterator<? extends Resource> resources, ListMultimap<String, Resource> index, Map<Pattern, String> fileNameNormalizationPatterns) { while (resources.hasNext()) { Resource res =; String filename = res.getURI().lastSegment(); index.put(filename, res); // handle renaming if (fileNameNormalizationPatterns == null) { continue; } for (Pattern pattern : fileNameNormalizationPatterns.keySet()) { String replace = fileNameNormalizationPatterns.get(pattern); String newFilename = pattern.matcher(filename).replaceAll(replace); if (!filename.equals(newFilename)) { index.put(newFilename, res); } } } } }