List of usage examples for java.util HashSet clear
public void clear()
From source file:org.apache.hadoop.mapred.split.TezMapredSplitsGrouper.java
public InputSplit[] getGroupedSplits(Configuration conf, InputSplit[] originalSplits, int desiredNumSplits, String wrappedInputFormatName) throws IOException { LOG.info("Grouping splits in Tez"); int configNumSplits = conf.getInt(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_COUNT, 0); if (configNumSplits > 0) { // always use config override if specified desiredNumSplits = configNumSplits; LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits); }// www . ja v a2 s . c o m if (!(configNumSplits > 0 || originalSplits == null || originalSplits.length == 0)) { // numSplits has not been overridden by config // numSplits has been set at runtime // there are splits generated // Do sanity checks long totalLength = 0; for (InputSplit split : originalSplits) { totalLength += split.getLength(); } int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.length; long lengthPerGroup = totalLength / splitCount; long maxLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT); long minLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT); if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) { throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. " + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup); } if (lengthPerGroup > maxLengthPerGroup) { // splits too big to work. Need to override with max size. int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1; LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: " + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.length); desiredNumSplits = newDesiredNumSplits; } else if (lengthPerGroup < minLengthPerGroup) { // splits too small to work. Need to override with size. int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1; LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: " + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.length); desiredNumSplits = newDesiredNumSplits; } } if (originalSplits == null) { LOG.info("Null original splits"); return null; } if (desiredNumSplits == 0 || originalSplits.length == 0 || desiredNumSplits >= originalSplits.length) { // nothing set. so return all the splits as is LOG.info("Using original number of splits: " + originalSplits.length + " desired splits: " + desiredNumSplits); InputSplit[] groupedSplits = new TezGroupedSplit[originalSplits.length]; int i = 0; for (InputSplit split : originalSplits) { TezGroupedSplit newSplit = new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations()); newSplit.addSplit(split); groupedSplits[i++] = newSplit; } return groupedSplits; } String emptyLocation = "EmptyLocation"; String[] emptyLocations = { emptyLocation }; List<InputSplit> groupedSplitsList = new ArrayList<InputSplit>(desiredNumSplits); long totalLength = 0; Map<String, LocationHolder> distinctLocations = createLocationsMap(conf); // go through splits and add them to locations for (InputSplit split : originalSplits) { totalLength += split.getLength(); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } distinctLocations.put(location, null); } } long lengthPerGroup = totalLength / desiredNumSplits; int numNodeLocations = distinctLocations.size(); int numSplitsPerLocation = originalSplits.length / numNodeLocations; int numSplitsInGroup = originalSplits.length / desiredNumSplits; // allocation loop here so that we have a good initial size for the lists for (String location : distinctLocations.keySet()) { distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1)); } Set<String> locSet = new HashSet<String>(); for (InputSplit split : originalSplits) { locSet.clear(); SplitHolder splitHolder = new SplitHolder(split); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } locSet.add(location); } for (String location : locSet) { LocationHolder holder = distinctLocations.get(location); holder.splits.add(splitHolder); } } boolean groupByLength = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT); boolean groupByCount = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT); if (!(groupByLength || groupByCount)) { throw new TezUncheckedException("None of the grouping parameters are true: " + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT); } LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: " + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: " + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.length + " . Grouping by length: " + groupByLength + " count: " + groupByCount); // go through locations and group splits int splitsProcessed = 0; List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup + 1); Set<String> groupLocationSet = new HashSet<String>(10); boolean allowSmallGroups = false; boolean doingRackLocal = false; int iterations = 0; while (splitsProcessed < originalSplits.length) { iterations++; int numFullGroupsCreated = 0; for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { group.clear(); groupLocationSet.clear(); String location = entry.getKey(); LocationHolder holder = entry.getValue(); SplitHolder splitHolder = holder.getUnprocessedHeadSplit(); if (splitHolder == null) { // all splits on node processed continue; } int oldHeadIndex = holder.headIndex; long groupLength = 0; int groupNumSplits = 0; do { group.add(splitHolder); groupLength += splitHolder.split.getLength(); groupNumSplits++; holder.incrementHeadIndex(); splitHolder = holder.getUnprocessedHeadSplit(); } while (splitHolder != null && (!groupByLength || (groupLength + splitHolder.split.getLength() <= lengthPerGroup)) && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup))); if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2) && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) { // group too small, reset it holder.headIndex = oldHeadIndex; continue; } numFullGroupsCreated++; // One split group created String[] groupLocation = { location }; if (location == emptyLocation) { groupLocation = null; } else if (doingRackLocal) { for (SplitHolder splitH : group) { String[] locations = splitH.split.getLocations(); if (locations != null) { for (String loc : locations) { if (loc != null) { groupLocationSet.add(loc); } } } } groupLocation = groupLocationSet.toArray(groupLocation); } TezGroupedSplit groupedSplit = new TezGroupedSplit(group.size(), wrappedInputFormatName, groupLocation, // pass rack local hint directly to AM ((doingRackLocal && location != emptyLocation) ? location : null)); for (SplitHolder groupedSplitHolder : group) { groupedSplit.addSplit(groupedSplitHolder.split); Preconditions.checkState(groupedSplitHolder.isProcessed == false, "Duplicates in grouping at location: " + location); groupedSplitHolder.isProcessed = true; splitsProcessed++; } if (LOG.isDebugEnabled()) { LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: " + location); } groupedSplitsList.add(groupedSplit); } if (!doingRackLocal && numFullGroupsCreated < 1) { // no node could create a node-local group. go rack-local doingRackLocal = true; // re-create locations int numRemainingSplits = originalSplits.length - splitsProcessed; Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits); // gather remaining splits. for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { LocationHolder locHolder = entry.getValue(); while (!locHolder.isEmpty()) { SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit(); if (splitHolder != null) { remainingSplits.add(splitHolder.split); locHolder.incrementHeadIndex(); } } } if (remainingSplits.size() != numRemainingSplits) { throw new TezUncheckedException( "Expected: " + numRemainingSplits + " got: " + remainingSplits.size()); } // doing all this now instead of up front because the number of remaining // splits is expected to be much smaller RackResolver.init(conf); Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size()); Map<String, LocationHolder> rackLocations = createLocationsMap(conf); for (String location : distinctLocations.keySet()) { String rack = emptyLocation; if (location != emptyLocation) { rack = RackResolver.resolve(location).getNetworkLocation(); } locToRackMap.put(location, rack); if (rackLocations.get(rack) == null) { // splits will probably be located in all racks rackLocations.put(rack, new LocationHolder(numRemainingSplits)); } } distinctLocations.clear(); HashSet<String> rackSet = new HashSet<String>(rackLocations.size()); int numRackSplitsToGroup = remainingSplits.size(); for (InputSplit split : originalSplits) { if (numRackSplitsToGroup == 0) { break; } // Iterate through the original splits in their order and consider them for grouping. // This maintains the original ordering in the list and thus subsequent grouping will // maintain that order if (!remainingSplits.contains(split)) { continue; } numRackSplitsToGroup--; rackSet.clear(); SplitHolder splitHolder = new SplitHolder(split); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } rackSet.add(locToRackMap.get(location)); } for (String rack : rackSet) { rackLocations.get(rack).splits.add(splitHolder); } } remainingSplits.clear(); distinctLocations = rackLocations; // adjust split length to be smaller because the data is non local float rackSplitReduction = conf.getFloat( TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION, TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT); if (rackSplitReduction > 0) { long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction); int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction); if (newLengthPerGroup > 0) { lengthPerGroup = newLengthPerGroup; } if (newNumSplitsInGroup > 0) { numSplitsInGroup = newNumSplitsInGroup; } } LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplitsList.size() + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup); // dont do smallGroups for the first pass continue; } if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) { // a few nodes have a lot of data or data is thinly spread across nodes // so allow small groups now allowSmallGroups = true; LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplitsList.size()); } if (LOG.isDebugEnabled()) { LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplitsList.size()); } } InputSplit[] groupedSplits = new InputSplit[groupedSplitsList.size()]; groupedSplitsList.toArray(groupedSplits); LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplitsList.size() + " splitsProcessed: " + splitsProcessed); return groupedSplits; }
From source file:org.apache.hadoop.mapreduce.split.TezMapReduceSplitsGrouper.java
public List<InputSplit> getGroupedSplits(Configuration conf, List<InputSplit> originalSplits, int desiredNumSplits, String wrappedInputFormatName) throws IOException, InterruptedException { LOG.info("Grouping splits in Tez"); int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0); if (configNumSplits > 0) { // always use config override if specified desiredNumSplits = configNumSplits; LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits); }//from w w w . j a v a2s. c om if (!(configNumSplits > 0 || originalSplits == null || originalSplits.size() == 0)) { // numSplits has not been overridden by config // numSplits has been set at runtime // there are splits generated // desired splits is less than number of splits generated // Do sanity checks long totalLength = 0; for (InputSplit split : originalSplits) { totalLength += split.getLength(); } int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.size(); long lengthPerGroup = totalLength / splitCount; long maxLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MAX_SIZE, TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT); long minLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MIN_SIZE, TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT); if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) { throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. " + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup); } if (lengthPerGroup > maxLengthPerGroup) { // splits too big to work. Need to override with max size. int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1; LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: " + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.size()); desiredNumSplits = newDesiredNumSplits; } else if (lengthPerGroup < minLengthPerGroup) { // splits too small to work. Need to override with size. int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1; LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: " + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.size()); desiredNumSplits = newDesiredNumSplits; } } List<InputSplit> groupedSplits = null; if (desiredNumSplits == 0 || originalSplits.size() == 0 || desiredNumSplits >= originalSplits.size()) { // nothing set. so return all the splits as is LOG.info("Using original number of splits: " + originalSplits.size() + " desired splits: " + desiredNumSplits); groupedSplits = new ArrayList<InputSplit>(originalSplits.size()); for (InputSplit split : originalSplits) { TezGroupedSplit newSplit = new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations()); newSplit.addSplit(split); groupedSplits.add(newSplit); } return groupedSplits; } String emptyLocation = "EmptyLocation"; String[] emptyLocations = { emptyLocation }; groupedSplits = new ArrayList<InputSplit>(desiredNumSplits); long totalLength = 0; Map<String, LocationHolder> distinctLocations = createLocationsMap(conf); // go through splits and add them to locations for (InputSplit split : originalSplits) { totalLength += split.getLength(); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } distinctLocations.put(location, null); } } long lengthPerGroup = totalLength / desiredNumSplits; int numNodeLocations = distinctLocations.size(); int numSplitsPerLocation = originalSplits.size() / numNodeLocations; int numSplitsInGroup = originalSplits.size() / desiredNumSplits; // allocation loop here so that we have a good initial size for the lists for (String location : distinctLocations.keySet()) { distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1)); } Set<String> locSet = new HashSet<String>(); for (InputSplit split : originalSplits) { locSet.clear(); SplitHolder splitHolder = new SplitHolder(split); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } locSet.add(location); } for (String location : locSet) { LocationHolder holder = distinctLocations.get(location); holder.splits.add(splitHolder); } } boolean groupByLength = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_LENGTH, TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT); boolean groupByCount = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_COUNT, TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT); if (!(groupByLength || groupByCount)) { throw new TezUncheckedException("None of the grouping parameters are true: " + TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TEZ_GROUPING_SPLIT_BY_COUNT); } LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: " + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: " + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.size() + " . Grouping by length: " + groupByLength + " count: " + groupByCount); // go through locations and group splits int splitsProcessed = 0; List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup); Set<String> groupLocationSet = new HashSet<String>(10); boolean allowSmallGroups = false; boolean doingRackLocal = false; int iterations = 0; while (splitsProcessed < originalSplits.size()) { iterations++; int numFullGroupsCreated = 0; for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { group.clear(); groupLocationSet.clear(); String location = entry.getKey(); LocationHolder holder = entry.getValue(); SplitHolder splitHolder = holder.getUnprocessedHeadSplit(); if (splitHolder == null) { // all splits on node processed continue; } int oldHeadIndex = holder.headIndex; long groupLength = 0; int groupNumSplits = 0; do { group.add(splitHolder); groupLength += splitHolder.split.getLength(); groupNumSplits++; holder.incrementHeadIndex(); splitHolder = holder.getUnprocessedHeadSplit(); } while (splitHolder != null && (!groupByLength || (groupLength + splitHolder.split.getLength() <= lengthPerGroup)) && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup))); if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2) && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) { // group too small, reset it holder.headIndex = oldHeadIndex; continue; } numFullGroupsCreated++; // One split group created String[] groupLocation = { location }; if (location == emptyLocation) { groupLocation = null; } else if (doingRackLocal) { for (SplitHolder splitH : group) { String[] locations = splitH.split.getLocations(); if (locations != null) { for (String loc : locations) { if (loc != null) { groupLocationSet.add(loc); } } } } groupLocation = groupLocationSet.toArray(groupLocation); } TezGroupedSplit groupedSplit = new TezGroupedSplit(group.size(), wrappedInputFormatName, groupLocation, // pass rack local hint directly to AM ((doingRackLocal && location != emptyLocation) ? location : null)); for (SplitHolder groupedSplitHolder : group) { groupedSplit.addSplit(groupedSplitHolder.split); Preconditions.checkState(groupedSplitHolder.isProcessed == false, "Duplicates in grouping at location: " + location); groupedSplitHolder.isProcessed = true; splitsProcessed++; } if (LOG.isDebugEnabled()) { LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: " + location); } groupedSplits.add(groupedSplit); } if (!doingRackLocal && numFullGroupsCreated < 1) { // no node could create a node-local group. go rack-local doingRackLocal = true; // re-create locations int numRemainingSplits = originalSplits.size() - splitsProcessed; Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits); // gather remaining splits. for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) { LocationHolder locHolder = entry.getValue(); while (!locHolder.isEmpty()) { SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit(); if (splitHolder != null) { remainingSplits.add(splitHolder.split); locHolder.incrementHeadIndex(); } } } if (remainingSplits.size() != numRemainingSplits) { throw new TezUncheckedException( "Expected: " + numRemainingSplits + " got: " + remainingSplits.size()); } // doing all this now instead of up front because the number of remaining // splits is expected to be much smaller RackResolver.init(conf); Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size()); Map<String, LocationHolder> rackLocations = createLocationsMap(conf); for (String location : distinctLocations.keySet()) { String rack = emptyLocation; if (location != emptyLocation) { rack = RackResolver.resolve(location).getNetworkLocation(); } locToRackMap.put(location, rack); if (rackLocations.get(rack) == null) { // splits will probably be located in all racks rackLocations.put(rack, new LocationHolder(numRemainingSplits)); } } distinctLocations.clear(); HashSet<String> rackSet = new HashSet<String>(rackLocations.size()); int numRackSplitsToGroup = remainingSplits.size(); for (InputSplit split : originalSplits) { if (numRackSplitsToGroup == 0) { break; } // Iterate through the original splits in their order and consider them for grouping. // This maintains the original ordering in the list and thus subsequent grouping will // maintain that order if (!remainingSplits.contains(split)) { continue; } numRackSplitsToGroup--; rackSet.clear(); SplitHolder splitHolder = new SplitHolder(split); String[] locations = split.getLocations(); if (locations == null || locations.length == 0) { locations = emptyLocations; } for (String location : locations) { if (location == null) { location = emptyLocation; } rackSet.add(locToRackMap.get(location)); } for (String rack : rackSet) { rackLocations.get(rack).splits.add(splitHolder); } } remainingSplits.clear(); distinctLocations = rackLocations; // adjust split length to be smaller because the data is non local float rackSplitReduction = conf.getFloat(TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION, TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT); if (rackSplitReduction > 0) { long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction); int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction); if (newLengthPerGroup > 0) { lengthPerGroup = newLengthPerGroup; } if (newNumSplitsInGroup > 0) { numSplitsInGroup = newNumSplitsInGroup; } } LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size() + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup); // dont do smallGroups for the first pass continue; } if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) { // a few nodes have a lot of data or data is thinly spread across nodes // so allow small groups now allowSmallGroups = true; LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()); } if (LOG.isDebugEnabled()) { LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()); } } LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplits.size() + " splitsProcessed: " + splitsProcessed); return groupedSplits; }
From source file:ai.grakn.test.graql.analytics.AnalyticsTest.java
@Test public void testDegreeIsCorrectAssertionAboutAssertion() throws GraknValidationException, ExecutionException, InterruptedException { // TODO: Fix on TinkerGraphComputer assumeFalse(usingTinker());// w ww .ja v a 2 s.c om // create a simple graph RoleType pet = graph.putRoleType("pet"); RoleType owner = graph.putRoleType("owner"); RelationType mansBestFriend = graph.putRelationType("mans-best-friend").hasRole(pet).hasRole(owner); RoleType target = graph.putRoleType("target"); RoleType value = graph.putRoleType("value"); RelationType hasName = graph.putRelationType("has-name").hasRole(value).hasRole(target); EntityType person = graph.putEntityType("person").playsRole(owner); EntityType animal = graph.putEntityType("animal").playsRole(pet).playsRole(target); ResourceType<String> name = graph.putResourceType("name", ResourceType.DataType.STRING).playsRole(value); ResourceType<String> altName = graph.putResourceType("alternate-name", ResourceType.DataType.STRING) .playsRole(value); RoleType ownership = graph.putRoleType("ownership"); RoleType ownershipResource = graph.putRoleType("ownership-resource"); RelationType hasOwnershipResource = graph.putRelationType("has-ownership-resource").hasRole(ownership) .hasRole(ownershipResource); ResourceType<String> startDate = graph.putResourceType("start-date", ResourceType.DataType.STRING) .playsRole(ownershipResource); mansBestFriend.playsRole(ownership); // add data to the graph Entity coco = animal.addEntity(); Entity dave = person.addEntity(); Resource coconut = name.putResource("coconut"); Resource stinky = altName.putResource("stinky"); Relation daveOwnsCoco = mansBestFriend.addRelation().putRolePlayer(owner, dave).putRolePlayer(pet, coco); hasName.addRelation().putRolePlayer(target, coco).putRolePlayer(value, coconut); hasName.addRelation().putRolePlayer(target, coco).putRolePlayer(value, stinky); Resource sd = startDate.putResource("01/01/01"); Relation ownsFrom = hasOwnershipResource.addRelation().putRolePlayer(ownershipResource, sd) .putRolePlayer(ownership, daveOwnsCoco); // manually compute the degree Map<String, Long> referenceDegrees1 = new HashMap<>(); referenceDegrees1.put(coco.getId(), 1L); referenceDegrees1.put(dave.getId(), 1L); referenceDegrees1.put(daveOwnsCoco.getId(), 3L); referenceDegrees1.put(sd.getId(), 1L); referenceDegrees1.put(ownsFrom.getId(), 2L); // manually compute degrees Map<String, Long> referenceDegrees2 = new HashMap<>(); referenceDegrees2.put(coco.getId(), 1L); referenceDegrees2.put(dave.getId(), 1L); referenceDegrees2.put(daveOwnsCoco.getId(), 2L); graph.commit(); // create a subgraph with assertion on assertion HashSet<String> ct = Sets.newHashSet("animal", "person", "mans-best-friend", "start-date", "has-ownership-resource"); Analytics computer = new Analytics(graph.getKeyspace(), ct, new HashSet<>()); Map<Long, Set<String>> degrees = computer.degrees(); assertTrue(!degrees.isEmpty()); degrees.entrySet().forEach(entry -> entry.getValue().forEach(id -> { assertTrue(referenceDegrees1.containsKey(id)); assertEquals(referenceDegrees1.get(id), entry.getKey()); })); // create subgraph without assertion on assertion ct.clear(); ct.add("animal"); ct.add("person"); ct.add("mans-best-friend"); computer = new Analytics(graph.getKeyspace(), ct, new HashSet<>()); degrees = computer.degrees(); assertFalse(degrees.isEmpty()); degrees.entrySet().forEach(entry -> entry.getValue().forEach(id -> { assertTrue(referenceDegrees2.containsKey(id)); assertEquals(referenceDegrees2.get(id), entry.getKey()); })); }
From source file:org.jumpmind.db.model.Database.java
/** * Initializes the model by establishing the relationships between elements * in this model encoded eg. in foreign keys etc. Also checks that the model * elements are valid (table and columns have a name, foreign keys rference * existing tables etc.)/*w w w .j a v a 2 s . c o m*/ */ public void initialize() throws ModelException { // we have to setup // * target tables in foreign keys // * columns in foreign key references // * columns in indices // * columns in uniques HashSet<String> namesOfProcessedTables = new HashSet<String>(); HashSet<String> namesOfProcessedColumns = new HashSet<String>(); HashSet<String> namesOfProcessedFks = new HashSet<String>(); HashSet<String> namesOfProcessedIndices = new HashSet<String>(); int tableIdx = 0; for (Iterator<Table> tableIt = tables.iterator(); tableIt.hasNext(); tableIdx++) { Table curTable = tableIt.next(); if ((curTable.getName() == null) || (curTable.getName().length() == 0)) { throw new ModelException("The table nr. " + tableIdx + " has no name"); } if (namesOfProcessedTables.contains(curTable.getFullyQualifiedTableName())) { throw new ModelException("There are multiple tables with the name " + curTable.getName()); } namesOfProcessedTables.add(curTable.getFullyQualifiedTableName()); namesOfProcessedColumns.clear(); namesOfProcessedFks.clear(); namesOfProcessedIndices.clear(); for (int idx = 0; idx < curTable.getColumnCount(); idx++) { Column column = curTable.getColumn(idx); if ((column.getName() == null) || (column.getName().length() == 0)) { throw new ModelException( "The column nr. " + idx + " in table " + curTable.getName() + " has no name"); } if (namesOfProcessedColumns.contains(column.getName())) { throw new ModelException("There are multiple column with the name " + column.getName() + " in the table " + curTable.getName()); } namesOfProcessedColumns.add(column.getName()); if ((column.getMappedType() == null) || (column.getMappedType().length() == 0)) { throw new ModelException( "The column nr. " + idx + " in table " + curTable.getName() + " has no type"); } if ((column.getMappedTypeCode() == Types.OTHER) && !"OTHER".equalsIgnoreCase(column.getMappedType())) { throw new ModelException("The column nr. " + idx + " in table " + curTable.getName() + " has an unknown type " + column.getMappedType()); } namesOfProcessedColumns.add(column.getName()); } for (int idx = 0; idx < curTable.getForeignKeyCount(); idx++) { ForeignKey fk = curTable.getForeignKey(idx); String fkName = (fk.getName() == null ? "" : fk.getName()); String fkDesc = (fkName.length() == 0 ? "nr. " + idx : fkName); if (fkName.length() > 0) { if (namesOfProcessedFks.contains(fkName)) { throw new ModelException("There are multiple foreign keys in table " + curTable.getName() + " with the name " + fkName); } namesOfProcessedFks.add(fkName); } if (fk.getForeignTable() == null) { Table targetTable = findTable(fk.getForeignTableName(), true); if (targetTable != null) { fk.setForeignTable(targetTable); } else { log.debug("The foreignkey " + fkDesc + " in table " + curTable.getName() + " references the undefined table " + fk.getForeignTableName() + ". This could be because the foreign key table was in another schema which is a bug that should be fixed in the future."); } } if (fk.getForeignTable() != null) { for (int refIdx = 0; refIdx < fk.getReferenceCount(); refIdx++) { Reference ref = fk.getReference(refIdx); if (ref.getLocalColumn() == null) { Column localColumn = curTable.findColumn(ref.getLocalColumnName(), true); if (localColumn == null) { throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName() + " references the undefined local column " + ref.getLocalColumnName()); } else { ref.setLocalColumn(localColumn); } } if (ref.getForeignColumn() == null) { Column foreignColumn = fk.getForeignTable().findColumn(ref.getForeignColumnName(), true); if (foreignColumn == null) { throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName() + " references the undefined local column " + ref.getForeignColumnName() + " in table " + fk.getForeignTable().getName()); } else { ref.setForeignColumn(foreignColumn); } } } } } for (int idx = 0; idx < curTable.getIndexCount(); idx++) { IIndex index = curTable.getIndex(idx); String indexName = (index.getName() == null ? "" : index.getName()); if (indexName.length() > 0) { if (namesOfProcessedIndices.contains(indexName)) { throw new ModelException("There are multiple indices in table " + curTable.getName() + " with the name " + indexName); } namesOfProcessedIndices.add(indexName); } for (int indexColumnIdx = 0; indexColumnIdx < index.getColumnCount(); indexColumnIdx++) { IndexColumn indexColumn = index.getColumn(indexColumnIdx); Column column = curTable.findColumn(indexColumn.getName(), true); indexColumn.setColumn(column); } } } }
From source file:org.alfresco.solr.tracker.MetadataTracker.java
private void indexTransactionsAfterAsynchronous(HashSet<Transaction> txsIndexed, TrackerState state) throws IOException { waitForAsynchronous();// w w w . ja v a 2 s .c om for (Transaction tx : txsIndexed) { super.infoSrv.indexTransaction(tx, true); // Transactions are ordered by commit time and tie-broken by tx id if (tx.getCommitTimeMs() > state.getLastIndexedTxCommitTime() || tx.getCommitTimeMs() == state.getLastIndexedTxCommitTime() && tx.getId() > state.getLastIndexedTxId()) { state.setLastIndexedTxCommitTime(tx.getCommitTimeMs()); state.setLastIndexedTxId(tx.getId()); } trackerStats.addTxDocs((int) (tx.getDeletes() + tx.getUpdates())); } txsIndexed.clear(); super.infoSrv.commit(); }
From source file:org.apache.ddlutils.model.Database.java
/** * Initializes the model by establishing the relationships between elements in this model encoded * eg. in foreign keys etc. Also checks that the model elements are valid (table and columns have * a name, foreign keys rference existing tables etc.) *//* w w w .ja v a 2 s .c o m*/ public void initialize() throws ModelException { // we have to setup // * target tables in foreign keys // * columns in foreign key references // * columns in indices // * columns in uniques HashSet namesOfProcessedTables = new HashSet(); HashSet namesOfProcessedColumns = new HashSet(); HashSet namesOfProcessedFks = new HashSet(); HashSet namesOfProcessedIndices = new HashSet(); int tableIdx = 0; if ((getName() == null) || (getName().length() == 0)) { throw new ModelException("The database model has no name"); } for (Iterator tableIt = _tables.iterator(); tableIt.hasNext(); tableIdx++) { Table curTable = (Table) tableIt.next(); if ((curTable.getName() == null) || (curTable.getName().length() == 0)) { throw new ModelException("The table nr. " + tableIdx + " has no name"); } if (namesOfProcessedTables.contains(curTable.getName())) { throw new ModelException("There are multiple tables with the name " + curTable.getName()); } namesOfProcessedTables.add(curTable.getName()); namesOfProcessedColumns.clear(); namesOfProcessedFks.clear(); namesOfProcessedIndices.clear(); for (int idx = 0; idx < curTable.getColumnCount(); idx++) { Column column = curTable.getColumn(idx); if ((column.getName() == null) || (column.getName().length() == 0)) { throw new ModelException( "The column nr. " + idx + " in table " + curTable.getName() + " has no name"); } if (namesOfProcessedColumns.contains(column.getName())) { throw new ModelException("There are multiple columns with the name " + column.getName() + " in the table " + curTable.getName()); } namesOfProcessedColumns.add(column.getName()); if ((column.getType() == null) || (column.getType().length() == 0)) { throw new ModelException( "The column nr. " + idx + " in table " + curTable.getName() + " has no type"); } if ((column.getTypeCode() == Types.OTHER) && !"OTHER".equalsIgnoreCase(column.getType())) { throw new ModelException("The column nr. " + idx + " in table " + curTable.getName() + " has an unknown type " + column.getType()); } namesOfProcessedColumns.add(column.getName()); } for (int idx = 0; idx < curTable.getForeignKeyCount(); idx++) { ForeignKey fk = curTable.getForeignKey(idx); String fkName = (fk.getName() == null ? "" : fk.getName()); String fkDesc = (fkName.length() == 0 ? "nr. " + idx : fkName); if (fkName.length() > 0) { if (namesOfProcessedFks.contains(fkName)) { throw new ModelException("There are multiple foreign keys in table " + curTable.getName() + " with the name " + fkName); } namesOfProcessedFks.add(fkName); } if (fk.getForeignTable() == null) { Table targetTable = findTable(fk.getForeignTableName(), true); if (targetTable == null) { final String msg = String.format( "The foreignkey [%s] in table [%s] references the undefined table [%s]. Will be ignored!", fkDesc, curTable.getName(), fk.getForeignTableName()); _log.debug(msg); continue; //throw new ModelException(msg); } else { fk.setForeignTable(targetTable); } } if (fk.getReferenceCount() == 0) { throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName() + " does not have any references"); } for (int refIdx = 0; refIdx < fk.getReferenceCount(); refIdx++) { Reference ref = fk.getReference(refIdx); if (ref.getLocalColumn() == null) { Column localColumn = curTable.findColumn(ref.getLocalColumnName(), true); if (localColumn == null) { throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName() + " references the undefined local column " + ref.getLocalColumnName()); } else { ref.setLocalColumn(localColumn); } } if (ref.getForeignColumn() == null) { Column foreignColumn = fk.getForeignTable().findColumn(ref.getForeignColumnName(), true); if (foreignColumn == null) { throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName() + " references the undefined local column " + ref.getForeignColumnName() + " in table " + fk.getForeignTable().getName()); } else { ref.setForeignColumn(foreignColumn); } } } } for (int idx = 0; idx < curTable.getIndexCount(); idx++) { Index index = curTable.getIndex(idx); String indexName = (index.getName() == null ? "" : index.getName()); String indexDesc = (indexName.length() == 0 ? "nr. " + idx : indexName); if (indexName.length() > 0) { if (namesOfProcessedIndices.contains(indexName)) { throw new ModelException("There are multiple indices in table " + curTable.getName() + " with the name " + indexName); } namesOfProcessedIndices.add(indexName); } if (index.getColumnCount() == 0) { throw new ModelException("The index " + indexDesc + " in table " + curTable.getName() + " does not have any columns"); } for (int indexColumnIdx = 0; indexColumnIdx < index.getColumnCount(); indexColumnIdx++) { IndexColumn indexColumn = index.getColumn(indexColumnIdx); Column column = curTable.findColumn(indexColumn.getName(), true); if (column == null) { throw new ModelException("The index " + indexDesc + " in table " + curTable.getName() + " references the undefined column " + indexColumn.getName()); } else { indexColumn.setColumn(column); } } } } }
From source file:org.apache.ddlutils.io.TestMisc.java
/** * Tests the backup and restore of several tables with complex relationships with an identity column and a foreign key to * itself while identity override is off. *//*from w w w.j ava2 s . c o m*/ public void testComplexTableModel() throws Exception { // A: self-reference (A1->A2) // B: self- and foreign-reference (B1->B2|G1, B2->G2) // C: circular reference involving more than one table (C1->D1,C2->D2) // D: foreign-reference to F (D1->F1,D2) // E: isolated table (E1) // F: foreign-reference to C (F1->C2) // G: no references (G1, G2) final String modelXml = "<?xml version='1.0' encoding='ISO-8859-1'?>\n" + "<database xmlns='" + DatabaseIO.DDLUTILS_NAMESPACE + "' name='roundtriptest'>\n" + " <table name='A'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " <column name='fk' type='INTEGER' required='false'/>\n" + " <foreign-key name='AtoA' foreignTable='A'>\n" + " <reference local='fk' foreign='pk'/>\n" + " </foreign-key>\n" + " </table>\n" + " <table name='B'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " <column name='fk1' type='INTEGER' required='false'/>\n" + " <column name='fk2' type='INTEGER' required='false'/>\n" + " <foreign-key name='BtoB' foreignTable='B'>\n" + " <reference local='fk1' foreign='pk'/>\n" + " </foreign-key>\n" + " <foreign-key name='BtoG' foreignTable='G'>\n" + " <reference local='fk2' foreign='pk'/>\n" + " </foreign-key>\n" + " </table>\n" + " <table name='C'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " <column name='fk' type='INTEGER' required='false'/>\n" + " <foreign-key name='CtoD' foreignTable='D'>\n" + " <reference local='fk' foreign='pk'/>\n" + " </foreign-key>\n" + " </table>\n" + " <table name='D'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " <column name='fk' type='INTEGER' required='false'/>\n" + " <foreign-key name='DtoF' foreignTable='F'>\n" + " <reference local='fk' foreign='pk'/>\n" + " </foreign-key>\n" + " </table>\n" + " <table name='E'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " </table>\n" + " <table name='F'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " <column name='fk' type='INTEGER' required='false'/>\n" + " <foreign-key name='FtoC' foreignTable='C'>\n" + " <reference local='fk' foreign='pk'/>\n" + " </foreign-key>\n" + " </table>\n" + " <table name='G'>\n" + " <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + " </table>\n" + "</database>"; createDatabase(modelXml); getPlatform().setIdentityOverrideOn(true); // this is the optimal insertion order insertRow("E", new Object[] { new Integer(1) }); insertRow("G", new Object[] { new Integer(1) }); insertRow("G", new Object[] { new Integer(2) }); insertRow("A", new Object[] { new Integer(2), null }); insertRow("A", new Object[] { new Integer(1), new Integer(2) }); insertRow("B", new Object[] { new Integer(2), null, new Integer(2) }); insertRow("B", new Object[] { new Integer(1), new Integer(2), new Integer(1) }); insertRow("D", new Object[] { new Integer(2), null }); insertRow("C", new Object[] { new Integer(2), new Integer(2) }); insertRow("F", new Object[] { new Integer(1), new Integer(2) }); insertRow("D", new Object[] { new Integer(1), new Integer(1) }); insertRow("C", new Object[] { new Integer(1), new Integer(1) }); StringWriter stringWriter = new StringWriter(); DatabaseDataIO dataIO = new DatabaseDataIO(); dataIO.writeDataToXML(getPlatform(), getModel(), stringWriter, "UTF-8"); String dataAsXml = stringWriter.toString(); // the somewhat optimized order that DdlUtils currently generates is: // E1, G1, G2, A2, A1, B2, B1, C2, C1, D2, D1, F1 // note that the order per table is the insertion order above SAXReader reader = new SAXReader(); Document testDoc = reader.read(new InputSource(new StringReader(dataAsXml))); boolean uppercase = false; List rows = testDoc.selectNodes("/*/*"); String pkColumnName = "pk"; assertEquals(12, rows.size()); if (!"e".equals(((Element) rows.get(0)).getName())) { assertEquals("E", ((Element) rows.get(0)).getName()); uppercase = true; } if (!"pk".equals(((Element) rows.get(0)).attribute(0).getName())) { pkColumnName = pkColumnName.toUpperCase(); } assertEquals("1", ((Element) rows.get(0)).attributeValue(pkColumnName)); // we cannot be sure of the order in which the database returns the rows // per table (some return them in pk order, some in insertion order) // so we don't assume an order in this test HashSet pkValues = new HashSet(); HashSet expectedValues = new HashSet(Arrays.asList(new String[] { "1", "2" })); assertEquals(uppercase ? "G" : "g", ((Element) rows.get(1)).getName()); assertEquals(uppercase ? "G" : "g", ((Element) rows.get(2)).getName()); pkValues.add(((Element) rows.get(1)).attributeValue(pkColumnName)); pkValues.add(((Element) rows.get(2)).attributeValue(pkColumnName)); assertEquals(pkValues, expectedValues); pkValues.clear(); assertEquals(uppercase ? "A" : "a", ((Element) rows.get(3)).getName()); assertEquals(uppercase ? "A" : "a", ((Element) rows.get(4)).getName()); pkValues.add(((Element) rows.get(3)).attributeValue(pkColumnName)); pkValues.add(((Element) rows.get(4)).attributeValue(pkColumnName)); assertEquals(pkValues, expectedValues); pkValues.clear(); assertEquals(uppercase ? "B" : "b", ((Element) rows.get(5)).getName()); assertEquals(uppercase ? "B" : "b", ((Element) rows.get(6)).getName()); pkValues.add(((Element) rows.get(5)).attributeValue(pkColumnName)); pkValues.add(((Element) rows.get(6)).attributeValue(pkColumnName)); assertEquals(pkValues, expectedValues); pkValues.clear(); assertEquals(uppercase ? "C" : "c", ((Element) rows.get(7)).getName()); assertEquals(uppercase ? "C" : "c", ((Element) rows.get(8)).getName()); pkValues.add(((Element) rows.get(7)).attributeValue(pkColumnName)); pkValues.add(((Element) rows.get(8)).attributeValue(pkColumnName)); assertEquals(pkValues, expectedValues); pkValues.clear(); assertEquals(uppercase ? "D" : "d", ((Element) rows.get(9)).getName()); assertEquals(uppercase ? "D" : "d", ((Element) rows.get(10)).getName()); pkValues.add(((Element) rows.get(9)).attributeValue(pkColumnName)); pkValues.add(((Element) rows.get(10)).attributeValue(pkColumnName)); assertEquals(pkValues, expectedValues); pkValues.clear(); assertEquals(uppercase ? "F" : "f", ((Element) rows.get(11)).getName()); assertEquals("1", ((Element) rows.get(11)).attributeValue(pkColumnName)); dropDatabase(); createDatabase(modelXml); StringReader stringReader = new StringReader(dataAsXml); dataIO.writeDataToDatabase(getPlatform(), getModel(), new Reader[] { stringReader }); assertEquals(2, getRows("A").size()); assertEquals(2, getRows("B").size()); assertEquals(2, getRows("C").size()); assertEquals(2, getRows("D").size()); assertEquals(1, getRows("E").size()); assertEquals(1, getRows("F").size()); assertEquals(2, getRows("G").size()); }
From source file:org.biopax.ols.impl.BaseOBO2AbstractLoader.java
/** * internal helper method to create TermPathBeans for a given term. This method will * precompute all paths from a parent to all its children for the 3 major relationship types: * IS_A, PART_OF and DEVELOPS_FROM. The PART_OF and DEVELOPS_FROM relations can traverse IS_A * relations for maximal completeness and still be semantically correct, but IS_A relationships * cannot traverse other relation types. * <pre>// w w w . ja v a 2 s.co m * term1 * |_ child1 child1 IS_A term1 * |_ child2 child2 IS_A term1 * subject pred object * </pre> * * @param obj - the OBOEdit term object to extract information from * @param trm - the OLS parent term to link to * @return a Collection of valid TermRelationshipBeans */ private Collection<TermPath> processPaths(OBOObject obj, TermBean trm) { HashSet<TermPath> retval = new HashSet<TermPath>(); HashMap<String, Integer> paths = parser.computeChildPaths(1, IS_A_SET, obj); retval.addAll(createTermPathBeans(paths, Constants.IS_A_RELATION_TYPE_ID, IS_A, trm)); //the part_of relation can traverse is_a relations to generate term_paths //so the set passed to computeChildPaths needs to contain both PART_OF and IS_A labels. HashSet<String> traversingSet = new HashSet<String>(); traversingSet.addAll(PART_OF_SET); traversingSet.addAll(IS_A_SET); paths = parser.computeChildPaths(1, traversingSet, obj); retval.addAll(createTermPathBeans(paths, Constants.PART_OF_RELATION_TYPE_ID, PART_OF, trm)); //the dev_from relation can traverse is_a relations to generate term_paths //so the set passed to computeChildPaths needs to contain both DEV_FROM and IS_A labels. traversingSet.clear(); traversingSet.addAll(DEV_FROM_SET); traversingSet.addAll(IS_A_SET); paths = parser.computeChildPaths(1, traversingSet, obj); retval.addAll(createTermPathBeans(paths, Constants.DEVELOPS_FROM_RELATION_TYPE_ID, DEVELOPS_FROM, trm)); return retval; }
From source file:org.eclipse.rdf4j.sail.solr.SolrIndexTest.java
@Test public void testAddMultiple() throws Exception { // add a statement to an index HashSet<Statement> added = new HashSet<Statement>(); HashSet<Statement> removed = new HashSet<Statement>(); added.add(statement11);/* w w w . j ava2 s . co m*/ added.add(statement12); added.add(statement21); added.add(statement22); index.begin(); index.addRemoveStatements(added, removed); index.commit(); // check that it arrived properly long count = client.query(new SolrQuery("*:*").setRows(0)).getResults().getNumFound(); assertEquals(2, count); // check the documents SearchDocument document = index.getDocuments(subject).iterator().next(); assertEquals(subject.toString(), document.getResource()); assertStatement(statement11, document); assertStatement(statement12, document); document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.getResource()); assertStatement(statement21, document); assertStatement(statement22, document); // check if the text field stores all added string values Set<String> texts = new HashSet<String>(); texts.add("cats"); texts.add("dogs"); // FIXME // assertTexts(texts, document); // add/remove one added.clear(); removed.clear(); added.add(statement23); removed.add(statement22); index.begin(); index.addRemoveStatements(added, removed); index.commit(); // check doc 2 document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.getResource()); assertStatement(statement21, document); assertStatement(statement23, document); assertNoStatement(statement22, document); // check if the text field stores all added and no deleted string values texts.remove("dogs"); texts.add("chicken"); // FIXME // assertTexts(texts, document); // TODO: check deletion of the rest }
From source file:pltag.parser.Lexicon.java
public void getFamily(String string) { HashMap<String, Integer> similars = new HashMap<String, Integer>(); int most = 0; HashSet<String> mostSimilar = new HashSet<String>(); for (String tree : (Collection<String>) lexEntriesTree.getCollection(string)) { if (!this.noOfTrees.containsKey(tree) || noOfTrees.get(tree) < 5) { continue; }//from w w w . jav a 2 s . c o m String t = tree.substring(tree.indexOf("\t") + 1); for (String assoc : trees.getCollection(t)) { if (assoc.contains(" unk") || assoc.contains(string)) { continue; } if (similars.containsKey(assoc)) { int newNum = similars.get(assoc) + 1; similars.put(assoc, newNum); if (newNum > most) { most = newNum; mostSimilar.clear(); mostSimilar.add(assoc); } if (newNum == most) { mostSimilar.add(assoc); } } else { similars.put(assoc, 1); } } } HashSet<String> simtrees = new HashSet<String>(); for (String mostSimWords : mostSimilar) { simtrees.addAll((Collection<String>) lexEntriesTree.getCollection(mostSimWords)); } System.out.println(mostSimilar + "\t"); System.out.print(simtrees.toString() + "\n"); }