Example usage for java.util HashSet clear

List of usage examples for java.util HashSet clear

Introduction

In this page you can find the example usage for java.util HashSet clear.

Prototype

public void clear() 

Source Link

Document

Removes all of the elements from this set.

Usage

From source file:org.apache.hadoop.mapred.split.TezMapredSplitsGrouper.java

public InputSplit[] getGroupedSplits(Configuration conf, InputSplit[] originalSplits, int desiredNumSplits,
        String wrappedInputFormatName) throws IOException {
    LOG.info("Grouping splits in Tez");

    int configNumSplits = conf.getInt(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_COUNT, 0);
    if (configNumSplits > 0) {
        // always use config override if specified
        desiredNumSplits = configNumSplits;
        LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits);
    }// www .  ja  v  a2  s . c  o  m

    if (!(configNumSplits > 0 || originalSplits == null || originalSplits.length == 0)) {
        // numSplits has not been overridden by config
        // numSplits has been set at runtime
        // there are splits generated
        // Do sanity checks
        long totalLength = 0;
        for (InputSplit split : originalSplits) {
            totalLength += split.getLength();
        }

        int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.length;
        long lengthPerGroup = totalLength / splitCount;

        long maxLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE,
                TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT);
        long minLengthPerGroup = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE,
                TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
        if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) {
            throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. "
                    + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
        }
        if (lengthPerGroup > maxLengthPerGroup) {
            // splits too big to work. Need to override with max size.
            int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: "
                    + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.length);

            desiredNumSplits = newDesiredNumSplits;
        } else if (lengthPerGroup < minLengthPerGroup) {
            // splits too small to work. Need to override with size.
            int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: "
                    + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.length);

            desiredNumSplits = newDesiredNumSplits;
        }
    }

    if (originalSplits == null) {
        LOG.info("Null original splits");
        return null;
    }

    if (desiredNumSplits == 0 || originalSplits.length == 0 || desiredNumSplits >= originalSplits.length) {
        // nothing set. so return all the splits as is
        LOG.info("Using original number of splits: " + originalSplits.length + " desired splits: "
                + desiredNumSplits);
        InputSplit[] groupedSplits = new TezGroupedSplit[originalSplits.length];
        int i = 0;
        for (InputSplit split : originalSplits) {
            TezGroupedSplit newSplit = new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations());
            newSplit.addSplit(split);
            groupedSplits[i++] = newSplit;
        }
        return groupedSplits;
    }

    String emptyLocation = "EmptyLocation";
    String[] emptyLocations = { emptyLocation };
    List<InputSplit> groupedSplitsList = new ArrayList<InputSplit>(desiredNumSplits);

    long totalLength = 0;
    Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
    // go through splits and add them to locations
    for (InputSplit split : originalSplits) {
        totalLength += split.getLength();
        String[] locations = split.getLocations();
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            distinctLocations.put(location, null);
        }
    }

    long lengthPerGroup = totalLength / desiredNumSplits;
    int numNodeLocations = distinctLocations.size();
    int numSplitsPerLocation = originalSplits.length / numNodeLocations;
    int numSplitsInGroup = originalSplits.length / desiredNumSplits;

    // allocation loop here so that we have a good initial size for the lists
    for (String location : distinctLocations.keySet()) {
        distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1));
    }

    Set<String> locSet = new HashSet<String>();
    for (InputSplit split : originalSplits) {
        locSet.clear();
        SplitHolder splitHolder = new SplitHolder(split);
        String[] locations = split.getLocations();
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            locSet.add(location);
        }
        for (String location : locSet) {
            LocationHolder holder = distinctLocations.get(location);
            holder.splits.add(splitHolder);
        }
    }

    boolean groupByLength = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH,
            TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
    boolean groupByCount = conf.getBoolean(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT,
            TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
    if (!(groupByLength || groupByCount)) {
        throw new TezUncheckedException("None of the grouping parameters are true: "
                + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH + ", "
                + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT);
    }
    LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: "
            + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: "
            + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.length
            + " . Grouping by length: " + groupByLength + " count: " + groupByCount);

    // go through locations and group splits
    int splitsProcessed = 0;
    List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup + 1);
    Set<String> groupLocationSet = new HashSet<String>(10);
    boolean allowSmallGroups = false;
    boolean doingRackLocal = false;
    int iterations = 0;
    while (splitsProcessed < originalSplits.length) {
        iterations++;
        int numFullGroupsCreated = 0;
        for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
            group.clear();
            groupLocationSet.clear();
            String location = entry.getKey();
            LocationHolder holder = entry.getValue();
            SplitHolder splitHolder = holder.getUnprocessedHeadSplit();
            if (splitHolder == null) {
                // all splits on node processed
                continue;
            }
            int oldHeadIndex = holder.headIndex;
            long groupLength = 0;
            int groupNumSplits = 0;
            do {
                group.add(splitHolder);
                groupLength += splitHolder.split.getLength();
                groupNumSplits++;
                holder.incrementHeadIndex();
                splitHolder = holder.getUnprocessedHeadSplit();
            } while (splitHolder != null
                    && (!groupByLength || (groupLength + splitHolder.split.getLength() <= lengthPerGroup))
                    && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup)));

            if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2)
                    && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) {
                // group too small, reset it
                holder.headIndex = oldHeadIndex;
                continue;
            }

            numFullGroupsCreated++;

            // One split group created
            String[] groupLocation = { location };
            if (location == emptyLocation) {
                groupLocation = null;
            } else if (doingRackLocal) {
                for (SplitHolder splitH : group) {
                    String[] locations = splitH.split.getLocations();
                    if (locations != null) {
                        for (String loc : locations) {
                            if (loc != null) {
                                groupLocationSet.add(loc);
                            }
                        }
                    }
                }
                groupLocation = groupLocationSet.toArray(groupLocation);
            }
            TezGroupedSplit groupedSplit = new TezGroupedSplit(group.size(), wrappedInputFormatName,
                    groupLocation,
                    // pass rack local hint directly to AM
                    ((doingRackLocal && location != emptyLocation) ? location : null));
            for (SplitHolder groupedSplitHolder : group) {
                groupedSplit.addSplit(groupedSplitHolder.split);
                Preconditions.checkState(groupedSplitHolder.isProcessed == false,
                        "Duplicates in grouping at location: " + location);
                groupedSplitHolder.isProcessed = true;
                splitsProcessed++;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: "
                        + location);
            }
            groupedSplitsList.add(groupedSplit);
        }

        if (!doingRackLocal && numFullGroupsCreated < 1) {
            // no node could create a node-local group. go rack-local
            doingRackLocal = true;
            // re-create locations
            int numRemainingSplits = originalSplits.length - splitsProcessed;
            Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits);
            // gather remaining splits.
            for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                LocationHolder locHolder = entry.getValue();
                while (!locHolder.isEmpty()) {
                    SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit();
                    if (splitHolder != null) {
                        remainingSplits.add(splitHolder.split);
                        locHolder.incrementHeadIndex();
                    }
                }
            }
            if (remainingSplits.size() != numRemainingSplits) {
                throw new TezUncheckedException(
                        "Expected: " + numRemainingSplits + " got: " + remainingSplits.size());
            }

            // doing all this now instead of up front because the number of remaining
            // splits is expected to be much smaller
            RackResolver.init(conf);
            Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size());
            Map<String, LocationHolder> rackLocations = createLocationsMap(conf);
            for (String location : distinctLocations.keySet()) {
                String rack = emptyLocation;
                if (location != emptyLocation) {
                    rack = RackResolver.resolve(location).getNetworkLocation();
                }
                locToRackMap.put(location, rack);
                if (rackLocations.get(rack) == null) {
                    // splits will probably be located in all racks
                    rackLocations.put(rack, new LocationHolder(numRemainingSplits));
                }
            }
            distinctLocations.clear();
            HashSet<String> rackSet = new HashSet<String>(rackLocations.size());
            int numRackSplitsToGroup = remainingSplits.size();
            for (InputSplit split : originalSplits) {
                if (numRackSplitsToGroup == 0) {
                    break;
                }
                // Iterate through the original splits in their order and consider them for grouping. 
                // This maintains the original ordering in the list and thus subsequent grouping will 
                // maintain that order
                if (!remainingSplits.contains(split)) {
                    continue;
                }
                numRackSplitsToGroup--;
                rackSet.clear();
                SplitHolder splitHolder = new SplitHolder(split);
                String[] locations = split.getLocations();
                if (locations == null || locations.length == 0) {
                    locations = emptyLocations;
                }
                for (String location : locations) {
                    if (location == null) {
                        location = emptyLocation;
                    }
                    rackSet.add(locToRackMap.get(location));
                }
                for (String rack : rackSet) {
                    rackLocations.get(rack).splits.add(splitHolder);
                }
            }
            remainingSplits.clear();
            distinctLocations = rackLocations;
            // adjust split length to be smaller because the data is non local
            float rackSplitReduction = conf.getFloat(
                    TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION,
                    TezMapReduceSplitsGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT);
            if (rackSplitReduction > 0) {
                long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction);
                int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction);
                if (newLengthPerGroup > 0) {
                    lengthPerGroup = newLengthPerGroup;
                }
                if (newNumSplitsInGroup > 0) {
                    numSplitsInGroup = newNumSplitsInGroup;
                }
            }

            LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplitsList.size() + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: "
                    + numSplitsInGroup);

            // dont do smallGroups for the first pass
            continue;
        }

        if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) {
            // a few nodes have a lot of data or data is thinly spread across nodes
            // so allow small groups now        
            allowSmallGroups = true;
            LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: "
                    + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplitsList.size());
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplitsList.size());
        }
    }
    InputSplit[] groupedSplits = new InputSplit[groupedSplitsList.size()];
    groupedSplitsList.toArray(groupedSplits);
    LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplitsList.size()
            + " splitsProcessed: " + splitsProcessed);
    return groupedSplits;
}

From source file:org.apache.hadoop.mapreduce.split.TezMapReduceSplitsGrouper.java

public List<InputSplit> getGroupedSplits(Configuration conf, List<InputSplit> originalSplits,
        int desiredNumSplits, String wrappedInputFormatName) throws IOException, InterruptedException {
    LOG.info("Grouping splits in Tez");

    int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0);
    if (configNumSplits > 0) {
        // always use config override if specified
        desiredNumSplits = configNumSplits;
        LOG.info("Desired numSplits overridden by config to: " + desiredNumSplits);
    }//from w w  w  .  j a  v a2s. c  om

    if (!(configNumSplits > 0 || originalSplits == null || originalSplits.size() == 0)) {
        // numSplits has not been overridden by config
        // numSplits has been set at runtime
        // there are splits generated
        // desired splits is less than number of splits generated
        // Do sanity checks
        long totalLength = 0;
        for (InputSplit split : originalSplits) {
            totalLength += split.getLength();
        }

        int splitCount = desiredNumSplits > 0 ? desiredNumSplits : originalSplits.size();
        long lengthPerGroup = totalLength / splitCount;

        long maxLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MAX_SIZE, TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT);
        long minLengthPerGroup = conf.getLong(TEZ_GROUPING_SPLIT_MIN_SIZE, TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
        if (maxLengthPerGroup < minLengthPerGroup || minLengthPerGroup <= 0) {
            throw new TezUncheckedException("Invalid max/min group lengths. Required min>0, max>=min. "
                    + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
        }
        if (lengthPerGroup > maxLengthPerGroup) {
            // splits too big to work. Need to override with max size.
            int newDesiredNumSplits = (int) (totalLength / maxLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too small. " + " Desired splitLength: "
                    + lengthPerGroup + " Max splitLength: " + maxLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.size());

            desiredNumSplits = newDesiredNumSplits;
        } else if (lengthPerGroup < minLengthPerGroup) {
            // splits too small to work. Need to override with size.
            int newDesiredNumSplits = (int) (totalLength / minLengthPerGroup) + 1;
            LOG.info("Desired splits: " + desiredNumSplits + " too large. " + " Desired splitLength: "
                    + lengthPerGroup + " Min splitLength: " + minLengthPerGroup + " New desired splits: "
                    + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: "
                    + originalSplits.size());

            desiredNumSplits = newDesiredNumSplits;
        }
    }

    List<InputSplit> groupedSplits = null;

    if (desiredNumSplits == 0 || originalSplits.size() == 0 || desiredNumSplits >= originalSplits.size()) {
        // nothing set. so return all the splits as is
        LOG.info("Using original number of splits: " + originalSplits.size() + " desired splits: "
                + desiredNumSplits);
        groupedSplits = new ArrayList<InputSplit>(originalSplits.size());
        for (InputSplit split : originalSplits) {
            TezGroupedSplit newSplit = new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations());
            newSplit.addSplit(split);
            groupedSplits.add(newSplit);
        }
        return groupedSplits;
    }

    String emptyLocation = "EmptyLocation";
    String[] emptyLocations = { emptyLocation };
    groupedSplits = new ArrayList<InputSplit>(desiredNumSplits);

    long totalLength = 0;
    Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
    // go through splits and add them to locations
    for (InputSplit split : originalSplits) {
        totalLength += split.getLength();
        String[] locations = split.getLocations();
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            distinctLocations.put(location, null);
        }
    }

    long lengthPerGroup = totalLength / desiredNumSplits;
    int numNodeLocations = distinctLocations.size();
    int numSplitsPerLocation = originalSplits.size() / numNodeLocations;
    int numSplitsInGroup = originalSplits.size() / desiredNumSplits;

    // allocation loop here so that we have a good initial size for the lists
    for (String location : distinctLocations.keySet()) {
        distinctLocations.put(location, new LocationHolder(numSplitsPerLocation + 1));
    }

    Set<String> locSet = new HashSet<String>();
    for (InputSplit split : originalSplits) {
        locSet.clear();
        SplitHolder splitHolder = new SplitHolder(split);
        String[] locations = split.getLocations();
        if (locations == null || locations.length == 0) {
            locations = emptyLocations;
        }
        for (String location : locations) {
            if (location == null) {
                location = emptyLocation;
            }
            locSet.add(location);
        }
        for (String location : locSet) {
            LocationHolder holder = distinctLocations.get(location);
            holder.splits.add(splitHolder);
        }
    }

    boolean groupByLength = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_LENGTH, TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
    boolean groupByCount = conf.getBoolean(TEZ_GROUPING_SPLIT_BY_COUNT, TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
    if (!(groupByLength || groupByCount)) {
        throw new TezUncheckedException("None of the grouping parameters are true: "
                + TEZ_GROUPING_SPLIT_BY_LENGTH + ", " + TEZ_GROUPING_SPLIT_BY_COUNT);
    }
    LOG.info("Desired numSplits: " + desiredNumSplits + " lengthPerGroup: " + lengthPerGroup + " numLocations: "
            + numNodeLocations + " numSplitsPerLocation: " + numSplitsPerLocation + " numSplitsInGroup: "
            + numSplitsInGroup + " totalLength: " + totalLength + " numOriginalSplits: " + originalSplits.size()
            + " . Grouping by length: " + groupByLength + " count: " + groupByCount);

    // go through locations and group splits
    int splitsProcessed = 0;
    List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup);
    Set<String> groupLocationSet = new HashSet<String>(10);
    boolean allowSmallGroups = false;
    boolean doingRackLocal = false;
    int iterations = 0;
    while (splitsProcessed < originalSplits.size()) {
        iterations++;
        int numFullGroupsCreated = 0;
        for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
            group.clear();
            groupLocationSet.clear();
            String location = entry.getKey();
            LocationHolder holder = entry.getValue();
            SplitHolder splitHolder = holder.getUnprocessedHeadSplit();
            if (splitHolder == null) {
                // all splits on node processed
                continue;
            }
            int oldHeadIndex = holder.headIndex;
            long groupLength = 0;
            int groupNumSplits = 0;
            do {
                group.add(splitHolder);
                groupLength += splitHolder.split.getLength();
                groupNumSplits++;
                holder.incrementHeadIndex();
                splitHolder = holder.getUnprocessedHeadSplit();
            } while (splitHolder != null
                    && (!groupByLength || (groupLength + splitHolder.split.getLength() <= lengthPerGroup))
                    && (!groupByCount || (groupNumSplits + 1 <= numSplitsInGroup)));

            if (holder.isEmpty() && !allowSmallGroups && (!groupByLength || groupLength < lengthPerGroup / 2)
                    && (!groupByCount || groupNumSplits < numSplitsInGroup / 2)) {
                // group too small, reset it
                holder.headIndex = oldHeadIndex;
                continue;
            }

            numFullGroupsCreated++;

            // One split group created
            String[] groupLocation = { location };
            if (location == emptyLocation) {
                groupLocation = null;
            } else if (doingRackLocal) {
                for (SplitHolder splitH : group) {
                    String[] locations = splitH.split.getLocations();
                    if (locations != null) {
                        for (String loc : locations) {
                            if (loc != null) {
                                groupLocationSet.add(loc);
                            }
                        }
                    }
                }
                groupLocation = groupLocationSet.toArray(groupLocation);
            }
            TezGroupedSplit groupedSplit = new TezGroupedSplit(group.size(), wrappedInputFormatName,
                    groupLocation,
                    // pass rack local hint directly to AM
                    ((doingRackLocal && location != emptyLocation) ? location : null));
            for (SplitHolder groupedSplitHolder : group) {
                groupedSplit.addSplit(groupedSplitHolder.split);
                Preconditions.checkState(groupedSplitHolder.isProcessed == false,
                        "Duplicates in grouping at location: " + location);
                groupedSplitHolder.isProcessed = true;
                splitsProcessed++;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Grouped " + group.size() + " length: " + groupedSplit.getLength() + " split at: "
                        + location);
            }
            groupedSplits.add(groupedSplit);
        }

        if (!doingRackLocal && numFullGroupsCreated < 1) {
            // no node could create a node-local group. go rack-local
            doingRackLocal = true;
            // re-create locations
            int numRemainingSplits = originalSplits.size() - splitsProcessed;
            Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits);
            // gather remaining splits.
            for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
                LocationHolder locHolder = entry.getValue();
                while (!locHolder.isEmpty()) {
                    SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit();
                    if (splitHolder != null) {
                        remainingSplits.add(splitHolder.split);
                        locHolder.incrementHeadIndex();
                    }
                }
            }
            if (remainingSplits.size() != numRemainingSplits) {
                throw new TezUncheckedException(
                        "Expected: " + numRemainingSplits + " got: " + remainingSplits.size());
            }

            // doing all this now instead of up front because the number of remaining
            // splits is expected to be much smaller
            RackResolver.init(conf);
            Map<String, String> locToRackMap = new HashMap<String, String>(distinctLocations.size());
            Map<String, LocationHolder> rackLocations = createLocationsMap(conf);
            for (String location : distinctLocations.keySet()) {
                String rack = emptyLocation;
                if (location != emptyLocation) {
                    rack = RackResolver.resolve(location).getNetworkLocation();
                }
                locToRackMap.put(location, rack);
                if (rackLocations.get(rack) == null) {
                    // splits will probably be located in all racks
                    rackLocations.put(rack, new LocationHolder(numRemainingSplits));
                }
            }
            distinctLocations.clear();
            HashSet<String> rackSet = new HashSet<String>(rackLocations.size());
            int numRackSplitsToGroup = remainingSplits.size();
            for (InputSplit split : originalSplits) {
                if (numRackSplitsToGroup == 0) {
                    break;
                }
                // Iterate through the original splits in their order and consider them for grouping. 
                // This maintains the original ordering in the list and thus subsequent grouping will 
                // maintain that order
                if (!remainingSplits.contains(split)) {
                    continue;
                }
                numRackSplitsToGroup--;
                rackSet.clear();
                SplitHolder splitHolder = new SplitHolder(split);
                String[] locations = split.getLocations();
                if (locations == null || locations.length == 0) {
                    locations = emptyLocations;
                }
                for (String location : locations) {
                    if (location == null) {
                        location = emptyLocation;
                    }
                    rackSet.add(locToRackMap.get(location));
                }
                for (String rack : rackSet) {
                    rackLocations.get(rack).splits.add(splitHolder);
                }
            }

            remainingSplits.clear();
            distinctLocations = rackLocations;
            // adjust split length to be smaller because the data is non local
            float rackSplitReduction = conf.getFloat(TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION,
                    TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT);
            if (rackSplitReduction > 0) {
                long newLengthPerGroup = (long) (lengthPerGroup * rackSplitReduction);
                int newNumSplitsInGroup = (int) (numSplitsInGroup * rackSplitReduction);
                if (newLengthPerGroup > 0) {
                    lengthPerGroup = newLengthPerGroup;
                }
                if (newNumSplitsInGroup > 0) {
                    numSplitsInGroup = newNumSplitsInGroup;
                }
            }

            LOG.info("Doing rack local after iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: " + groupedSplits.size()
                    + " lengthPerGroup: " + lengthPerGroup + " numSplitsInGroup: " + numSplitsInGroup);

            // dont do smallGroups for the first pass
            continue;
        }

        if (!allowSmallGroups && numFullGroupsCreated <= numNodeLocations / 10) {
            // a few nodes have a lot of data or data is thinly spread across nodes
            // so allow small groups now        
            allowSmallGroups = true;
            LOG.info("Allowing small groups after iteration: " + iterations + " splitsProcessed: "
                    + splitsProcessed + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Iteration: " + iterations + " splitsProcessed: " + splitsProcessed
                    + " numFullGroupsInRound: " + numFullGroupsCreated + " totalGroups: "
                    + groupedSplits.size());
        }
    }
    LOG.info("Number of splits desired: " + desiredNumSplits + " created: " + groupedSplits.size()
            + " splitsProcessed: " + splitsProcessed);
    return groupedSplits;
}

From source file:ai.grakn.test.graql.analytics.AnalyticsTest.java

@Test
public void testDegreeIsCorrectAssertionAboutAssertion()
        throws GraknValidationException, ExecutionException, InterruptedException {
    // TODO: Fix on TinkerGraphComputer
    assumeFalse(usingTinker());// w  ww  .ja v  a  2 s.c  om

    // create a simple graph
    RoleType pet = graph.putRoleType("pet");
    RoleType owner = graph.putRoleType("owner");
    RelationType mansBestFriend = graph.putRelationType("mans-best-friend").hasRole(pet).hasRole(owner);
    RoleType target = graph.putRoleType("target");
    RoleType value = graph.putRoleType("value");
    RelationType hasName = graph.putRelationType("has-name").hasRole(value).hasRole(target);
    EntityType person = graph.putEntityType("person").playsRole(owner);
    EntityType animal = graph.putEntityType("animal").playsRole(pet).playsRole(target);
    ResourceType<String> name = graph.putResourceType("name", ResourceType.DataType.STRING).playsRole(value);
    ResourceType<String> altName = graph.putResourceType("alternate-name", ResourceType.DataType.STRING)
            .playsRole(value);
    RoleType ownership = graph.putRoleType("ownership");
    RoleType ownershipResource = graph.putRoleType("ownership-resource");
    RelationType hasOwnershipResource = graph.putRelationType("has-ownership-resource").hasRole(ownership)
            .hasRole(ownershipResource);
    ResourceType<String> startDate = graph.putResourceType("start-date", ResourceType.DataType.STRING)
            .playsRole(ownershipResource);
    mansBestFriend.playsRole(ownership);

    // add data to the graph
    Entity coco = animal.addEntity();
    Entity dave = person.addEntity();
    Resource coconut = name.putResource("coconut");
    Resource stinky = altName.putResource("stinky");
    Relation daveOwnsCoco = mansBestFriend.addRelation().putRolePlayer(owner, dave).putRolePlayer(pet, coco);
    hasName.addRelation().putRolePlayer(target, coco).putRolePlayer(value, coconut);
    hasName.addRelation().putRolePlayer(target, coco).putRolePlayer(value, stinky);
    Resource sd = startDate.putResource("01/01/01");
    Relation ownsFrom = hasOwnershipResource.addRelation().putRolePlayer(ownershipResource, sd)
            .putRolePlayer(ownership, daveOwnsCoco);

    // manually compute the degree
    Map<String, Long> referenceDegrees1 = new HashMap<>();
    referenceDegrees1.put(coco.getId(), 1L);
    referenceDegrees1.put(dave.getId(), 1L);
    referenceDegrees1.put(daveOwnsCoco.getId(), 3L);
    referenceDegrees1.put(sd.getId(), 1L);
    referenceDegrees1.put(ownsFrom.getId(), 2L);

    // manually compute degrees
    Map<String, Long> referenceDegrees2 = new HashMap<>();
    referenceDegrees2.put(coco.getId(), 1L);
    referenceDegrees2.put(dave.getId(), 1L);
    referenceDegrees2.put(daveOwnsCoco.getId(), 2L);

    graph.commit();

    // create a subgraph with assertion on assertion
    HashSet<String> ct = Sets.newHashSet("animal", "person", "mans-best-friend", "start-date",
            "has-ownership-resource");
    Analytics computer = new Analytics(graph.getKeyspace(), ct, new HashSet<>());
    Map<Long, Set<String>> degrees = computer.degrees();
    assertTrue(!degrees.isEmpty());
    degrees.entrySet().forEach(entry -> entry.getValue().forEach(id -> {
        assertTrue(referenceDegrees1.containsKey(id));
        assertEquals(referenceDegrees1.get(id), entry.getKey());
    }));

    // create subgraph without assertion on assertion
    ct.clear();
    ct.add("animal");
    ct.add("person");
    ct.add("mans-best-friend");
    computer = new Analytics(graph.getKeyspace(), ct, new HashSet<>());
    degrees = computer.degrees();
    assertFalse(degrees.isEmpty());
    degrees.entrySet().forEach(entry -> entry.getValue().forEach(id -> {
        assertTrue(referenceDegrees2.containsKey(id));
        assertEquals(referenceDegrees2.get(id), entry.getKey());
    }));
}

From source file:org.jumpmind.db.model.Database.java

/**
 * Initializes the model by establishing the relationships between elements
 * in this model encoded eg. in foreign keys etc. Also checks that the model
 * elements are valid (table and columns have a name, foreign keys rference
 * existing tables etc.)/*w  w  w  .j  a v  a  2  s  .  c  o m*/
 */
public void initialize() throws ModelException {
    // we have to setup
    // * target tables in foreign keys
    // * columns in foreign key references
    // * columns in indices
    // * columns in uniques
    HashSet<String> namesOfProcessedTables = new HashSet<String>();
    HashSet<String> namesOfProcessedColumns = new HashSet<String>();
    HashSet<String> namesOfProcessedFks = new HashSet<String>();
    HashSet<String> namesOfProcessedIndices = new HashSet<String>();
    int tableIdx = 0;

    for (Iterator<Table> tableIt = tables.iterator(); tableIt.hasNext(); tableIdx++) {
        Table curTable = tableIt.next();

        if ((curTable.getName() == null) || (curTable.getName().length() == 0)) {
            throw new ModelException("The table nr. " + tableIdx + " has no name");
        }
        if (namesOfProcessedTables.contains(curTable.getFullyQualifiedTableName())) {
            throw new ModelException("There are multiple tables with the name " + curTable.getName());
        }
        namesOfProcessedTables.add(curTable.getFullyQualifiedTableName());

        namesOfProcessedColumns.clear();
        namesOfProcessedFks.clear();
        namesOfProcessedIndices.clear();

        for (int idx = 0; idx < curTable.getColumnCount(); idx++) {
            Column column = curTable.getColumn(idx);

            if ((column.getName() == null) || (column.getName().length() == 0)) {
                throw new ModelException(
                        "The column nr. " + idx + " in table " + curTable.getName() + " has no name");
            }
            if (namesOfProcessedColumns.contains(column.getName())) {
                throw new ModelException("There are multiple column with the name " + column.getName()
                        + " in the table " + curTable.getName());
            }
            namesOfProcessedColumns.add(column.getName());

            if ((column.getMappedType() == null) || (column.getMappedType().length() == 0)) {
                throw new ModelException(
                        "The column nr. " + idx + " in table " + curTable.getName() + " has no type");
            }
            if ((column.getMappedTypeCode() == Types.OTHER)
                    && !"OTHER".equalsIgnoreCase(column.getMappedType())) {
                throw new ModelException("The column nr. " + idx + " in table " + curTable.getName()
                        + " has an unknown type " + column.getMappedType());
            }
            namesOfProcessedColumns.add(column.getName());
        }

        for (int idx = 0; idx < curTable.getForeignKeyCount(); idx++) {
            ForeignKey fk = curTable.getForeignKey(idx);
            String fkName = (fk.getName() == null ? "" : fk.getName());
            String fkDesc = (fkName.length() == 0 ? "nr. " + idx : fkName);

            if (fkName.length() > 0) {
                if (namesOfProcessedFks.contains(fkName)) {
                    throw new ModelException("There are multiple foreign keys in table " + curTable.getName()
                            + " with the name " + fkName);
                }
                namesOfProcessedFks.add(fkName);
            }

            if (fk.getForeignTable() == null) {
                Table targetTable = findTable(fk.getForeignTableName(), true);

                if (targetTable != null) {
                    fk.setForeignTable(targetTable);
                } else {
                    log.debug("The foreignkey " + fkDesc + " in table " + curTable.getName()
                            + " references the undefined table " + fk.getForeignTableName()
                            + ".  This could be because the foreign key table was in another schema which is a bug that should be fixed in the future.");
                }
            }
            if (fk.getForeignTable() != null) {
                for (int refIdx = 0; refIdx < fk.getReferenceCount(); refIdx++) {
                    Reference ref = fk.getReference(refIdx);

                    if (ref.getLocalColumn() == null) {
                        Column localColumn = curTable.findColumn(ref.getLocalColumnName(), true);

                        if (localColumn == null) {
                            throw new ModelException("The foreignkey " + fkDesc + " in table "
                                    + curTable.getName() + " references the undefined local column "
                                    + ref.getLocalColumnName());
                        } else {
                            ref.setLocalColumn(localColumn);
                        }
                    }
                    if (ref.getForeignColumn() == null) {
                        Column foreignColumn = fk.getForeignTable().findColumn(ref.getForeignColumnName(),
                                true);

                        if (foreignColumn == null) {
                            throw new ModelException("The foreignkey " + fkDesc + " in table "
                                    + curTable.getName() + " references the undefined local column "
                                    + ref.getForeignColumnName() + " in table "
                                    + fk.getForeignTable().getName());
                        } else {
                            ref.setForeignColumn(foreignColumn);
                        }
                    }
                }
            }
        }

        for (int idx = 0; idx < curTable.getIndexCount(); idx++) {
            IIndex index = curTable.getIndex(idx);
            String indexName = (index.getName() == null ? "" : index.getName());

            if (indexName.length() > 0) {
                if (namesOfProcessedIndices.contains(indexName)) {
                    throw new ModelException("There are multiple indices in table " + curTable.getName()
                            + " with the name " + indexName);
                }
                namesOfProcessedIndices.add(indexName);
            }

            for (int indexColumnIdx = 0; indexColumnIdx < index.getColumnCount(); indexColumnIdx++) {
                IndexColumn indexColumn = index.getColumn(indexColumnIdx);
                Column column = curTable.findColumn(indexColumn.getName(), true);
                indexColumn.setColumn(column);
            }
        }
    }
}

From source file:org.alfresco.solr.tracker.MetadataTracker.java

private void indexTransactionsAfterAsynchronous(HashSet<Transaction> txsIndexed, TrackerState state)
        throws IOException {
    waitForAsynchronous();//  w w w  .  ja v  a 2 s .c  om
    for (Transaction tx : txsIndexed) {
        super.infoSrv.indexTransaction(tx, true);
        // Transactions are ordered by commit time and tie-broken by tx id
        if (tx.getCommitTimeMs() > state.getLastIndexedTxCommitTime()
                || tx.getCommitTimeMs() == state.getLastIndexedTxCommitTime()
                        && tx.getId() > state.getLastIndexedTxId()) {
            state.setLastIndexedTxCommitTime(tx.getCommitTimeMs());
            state.setLastIndexedTxId(tx.getId());
        }
        trackerStats.addTxDocs((int) (tx.getDeletes() + tx.getUpdates()));
    }
    txsIndexed.clear();
    super.infoSrv.commit();
}

From source file:org.apache.ddlutils.model.Database.java

/**
 * Initializes the model by establishing the relationships between elements in this model encoded
 * eg. in foreign keys etc. Also checks that the model elements are valid (table and columns have
 * a name, foreign keys rference existing tables etc.) 
 *//*  w  w w .ja v a 2 s .c  o m*/
public void initialize() throws ModelException {
    // we have to setup
    // * target tables in foreign keys
    // * columns in foreign key references
    // * columns in indices
    // * columns in uniques
    HashSet namesOfProcessedTables = new HashSet();
    HashSet namesOfProcessedColumns = new HashSet();
    HashSet namesOfProcessedFks = new HashSet();
    HashSet namesOfProcessedIndices = new HashSet();
    int tableIdx = 0;

    if ((getName() == null) || (getName().length() == 0)) {
        throw new ModelException("The database model has no name");
    }

    for (Iterator tableIt = _tables.iterator(); tableIt.hasNext(); tableIdx++) {
        Table curTable = (Table) tableIt.next();

        if ((curTable.getName() == null) || (curTable.getName().length() == 0)) {
            throw new ModelException("The table nr. " + tableIdx + " has no name");
        }
        if (namesOfProcessedTables.contains(curTable.getName())) {
            throw new ModelException("There are multiple tables with the name " + curTable.getName());
        }
        namesOfProcessedTables.add(curTable.getName());

        namesOfProcessedColumns.clear();
        namesOfProcessedFks.clear();
        namesOfProcessedIndices.clear();

        for (int idx = 0; idx < curTable.getColumnCount(); idx++) {
            Column column = curTable.getColumn(idx);

            if ((column.getName() == null) || (column.getName().length() == 0)) {
                throw new ModelException(
                        "The column nr. " + idx + " in table " + curTable.getName() + " has no name");
            }
            if (namesOfProcessedColumns.contains(column.getName())) {
                throw new ModelException("There are multiple columns with the name " + column.getName()
                        + " in the table " + curTable.getName());
            }
            namesOfProcessedColumns.add(column.getName());

            if ((column.getType() == null) || (column.getType().length() == 0)) {
                throw new ModelException(
                        "The column nr. " + idx + " in table " + curTable.getName() + " has no type");
            }
            if ((column.getTypeCode() == Types.OTHER) && !"OTHER".equalsIgnoreCase(column.getType())) {
                throw new ModelException("The column nr. " + idx + " in table " + curTable.getName()
                        + " has an unknown type " + column.getType());
            }
            namesOfProcessedColumns.add(column.getName());
        }

        for (int idx = 0; idx < curTable.getForeignKeyCount(); idx++) {
            ForeignKey fk = curTable.getForeignKey(idx);
            String fkName = (fk.getName() == null ? "" : fk.getName());
            String fkDesc = (fkName.length() == 0 ? "nr. " + idx : fkName);

            if (fkName.length() > 0) {
                if (namesOfProcessedFks.contains(fkName)) {
                    throw new ModelException("There are multiple foreign keys in table " + curTable.getName()
                            + " with the name " + fkName);
                }
                namesOfProcessedFks.add(fkName);
            }

            if (fk.getForeignTable() == null) {
                Table targetTable = findTable(fk.getForeignTableName(), true);

                if (targetTable == null) {
                    final String msg = String.format(
                            "The foreignkey [%s] in table [%s] references the undefined table [%s]. Will be ignored!",
                            fkDesc, curTable.getName(), fk.getForeignTableName());
                    _log.debug(msg);
                    continue;
                    //throw new ModelException(msg);
                } else {
                    fk.setForeignTable(targetTable);
                }
            }
            if (fk.getReferenceCount() == 0) {
                throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName()
                        + " does not have any references");
            }
            for (int refIdx = 0; refIdx < fk.getReferenceCount(); refIdx++) {
                Reference ref = fk.getReference(refIdx);

                if (ref.getLocalColumn() == null) {
                    Column localColumn = curTable.findColumn(ref.getLocalColumnName(), true);

                    if (localColumn == null) {
                        throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName()
                                + " references the undefined local column " + ref.getLocalColumnName());
                    } else {
                        ref.setLocalColumn(localColumn);
                    }
                }
                if (ref.getForeignColumn() == null) {
                    Column foreignColumn = fk.getForeignTable().findColumn(ref.getForeignColumnName(), true);

                    if (foreignColumn == null) {
                        throw new ModelException("The foreignkey " + fkDesc + " in table " + curTable.getName()
                                + " references the undefined local column " + ref.getForeignColumnName()
                                + " in table " + fk.getForeignTable().getName());
                    } else {
                        ref.setForeignColumn(foreignColumn);
                    }
                }
            }
        }

        for (int idx = 0; idx < curTable.getIndexCount(); idx++) {
            Index index = curTable.getIndex(idx);
            String indexName = (index.getName() == null ? "" : index.getName());
            String indexDesc = (indexName.length() == 0 ? "nr. " + idx : indexName);

            if (indexName.length() > 0) {
                if (namesOfProcessedIndices.contains(indexName)) {
                    throw new ModelException("There are multiple indices in table " + curTable.getName()
                            + " with the name " + indexName);
                }
                namesOfProcessedIndices.add(indexName);
            }
            if (index.getColumnCount() == 0) {
                throw new ModelException("The index " + indexDesc + " in table " + curTable.getName()
                        + " does not have any columns");
            }

            for (int indexColumnIdx = 0; indexColumnIdx < index.getColumnCount(); indexColumnIdx++) {
                IndexColumn indexColumn = index.getColumn(indexColumnIdx);
                Column column = curTable.findColumn(indexColumn.getName(), true);

                if (column == null) {
                    throw new ModelException("The index " + indexDesc + " in table " + curTable.getName()
                            + " references the undefined column " + indexColumn.getName());
                } else {
                    indexColumn.setColumn(column);
                }
            }
        }
    }
}

From source file:org.apache.ddlutils.io.TestMisc.java

/**
 * Tests the backup and restore of several tables with complex relationships with an identity column and a foreign key to
 * itself while identity override is off.
 *//*from  w w  w.j  ava2 s  .  c  o  m*/
public void testComplexTableModel() throws Exception {
    // A: self-reference (A1->A2)
    // B: self- and foreign-reference (B1->B2|G1, B2->G2)
    // C: circular reference involving more than one table (C1->D1,C2->D2)
    // D: foreign-reference to F (D1->F1,D2)
    // E: isolated table (E1)
    // F: foreign-reference to C (F1->C2)
    // G: no references (G1, G2)

    final String modelXml = "<?xml version='1.0' encoding='ISO-8859-1'?>\n" + "<database xmlns='"
            + DatabaseIO.DDLUTILS_NAMESPACE + "' name='roundtriptest'>\n" + "  <table name='A'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n"
            + "    <column name='fk' type='INTEGER' required='false'/>\n"
            + "    <foreign-key name='AtoA' foreignTable='A'>\n"
            + "      <reference local='fk' foreign='pk'/>\n" + "    </foreign-key>\n" + "  </table>\n"
            + "  <table name='B'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n"
            + "    <column name='fk1' type='INTEGER' required='false'/>\n"
            + "    <column name='fk2' type='INTEGER' required='false'/>\n"
            + "    <foreign-key name='BtoB' foreignTable='B'>\n"
            + "      <reference local='fk1' foreign='pk'/>\n" + "    </foreign-key>\n"
            + "    <foreign-key name='BtoG' foreignTable='G'>\n"
            + "      <reference local='fk2' foreign='pk'/>\n" + "    </foreign-key>\n" + "  </table>\n"
            + "  <table name='C'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n"
            + "    <column name='fk' type='INTEGER' required='false'/>\n"
            + "    <foreign-key name='CtoD' foreignTable='D'>\n"
            + "      <reference local='fk' foreign='pk'/>\n" + "    </foreign-key>\n" + "  </table>\n"
            + "  <table name='D'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n"
            + "    <column name='fk' type='INTEGER' required='false'/>\n"
            + "    <foreign-key name='DtoF' foreignTable='F'>\n"
            + "      <reference local='fk' foreign='pk'/>\n" + "    </foreign-key>\n" + "  </table>\n"
            + "  <table name='E'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + "  </table>\n"
            + "  <table name='F'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n"
            + "    <column name='fk' type='INTEGER' required='false'/>\n"
            + "    <foreign-key name='FtoC' foreignTable='C'>\n"
            + "      <reference local='fk' foreign='pk'/>\n" + "    </foreign-key>\n" + "  </table>\n"
            + "  <table name='G'>\n"
            + "    <column name='pk' type='INTEGER' primaryKey='true' required='true'/>\n" + "  </table>\n"
            + "</database>";

    createDatabase(modelXml);

    getPlatform().setIdentityOverrideOn(true);

    // this is the optimal insertion order
    insertRow("E", new Object[] { new Integer(1) });
    insertRow("G", new Object[] { new Integer(1) });
    insertRow("G", new Object[] { new Integer(2) });
    insertRow("A", new Object[] { new Integer(2), null });
    insertRow("A", new Object[] { new Integer(1), new Integer(2) });
    insertRow("B", new Object[] { new Integer(2), null, new Integer(2) });
    insertRow("B", new Object[] { new Integer(1), new Integer(2), new Integer(1) });
    insertRow("D", new Object[] { new Integer(2), null });
    insertRow("C", new Object[] { new Integer(2), new Integer(2) });
    insertRow("F", new Object[] { new Integer(1), new Integer(2) });
    insertRow("D", new Object[] { new Integer(1), new Integer(1) });
    insertRow("C", new Object[] { new Integer(1), new Integer(1) });

    StringWriter stringWriter = new StringWriter();
    DatabaseDataIO dataIO = new DatabaseDataIO();

    dataIO.writeDataToXML(getPlatform(), getModel(), stringWriter, "UTF-8");

    String dataAsXml = stringWriter.toString();

    // the somewhat optimized order that DdlUtils currently generates is:
    // E1, G1, G2, A2, A1, B2, B1, C2, C1, D2, D1, F1
    // note that the order per table is the insertion order above
    SAXReader reader = new SAXReader();
    Document testDoc = reader.read(new InputSource(new StringReader(dataAsXml)));
    boolean uppercase = false;
    List rows = testDoc.selectNodes("/*/*");
    String pkColumnName = "pk";

    assertEquals(12, rows.size());
    if (!"e".equals(((Element) rows.get(0)).getName())) {
        assertEquals("E", ((Element) rows.get(0)).getName());
        uppercase = true;
    }
    if (!"pk".equals(((Element) rows.get(0)).attribute(0).getName())) {
        pkColumnName = pkColumnName.toUpperCase();
    }
    assertEquals("1", ((Element) rows.get(0)).attributeValue(pkColumnName));

    // we cannot be sure of the order in which the database returns the rows
    // per table (some return them in pk order, some in insertion order)
    // so we don't assume an order in this test
    HashSet pkValues = new HashSet();
    HashSet expectedValues = new HashSet(Arrays.asList(new String[] { "1", "2" }));

    assertEquals(uppercase ? "G" : "g", ((Element) rows.get(1)).getName());
    assertEquals(uppercase ? "G" : "g", ((Element) rows.get(2)).getName());
    pkValues.add(((Element) rows.get(1)).attributeValue(pkColumnName));
    pkValues.add(((Element) rows.get(2)).attributeValue(pkColumnName));
    assertEquals(pkValues, expectedValues);

    pkValues.clear();

    assertEquals(uppercase ? "A" : "a", ((Element) rows.get(3)).getName());
    assertEquals(uppercase ? "A" : "a", ((Element) rows.get(4)).getName());
    pkValues.add(((Element) rows.get(3)).attributeValue(pkColumnName));
    pkValues.add(((Element) rows.get(4)).attributeValue(pkColumnName));
    assertEquals(pkValues, expectedValues);

    pkValues.clear();

    assertEquals(uppercase ? "B" : "b", ((Element) rows.get(5)).getName());
    assertEquals(uppercase ? "B" : "b", ((Element) rows.get(6)).getName());
    pkValues.add(((Element) rows.get(5)).attributeValue(pkColumnName));
    pkValues.add(((Element) rows.get(6)).attributeValue(pkColumnName));
    assertEquals(pkValues, expectedValues);

    pkValues.clear();

    assertEquals(uppercase ? "C" : "c", ((Element) rows.get(7)).getName());
    assertEquals(uppercase ? "C" : "c", ((Element) rows.get(8)).getName());
    pkValues.add(((Element) rows.get(7)).attributeValue(pkColumnName));
    pkValues.add(((Element) rows.get(8)).attributeValue(pkColumnName));
    assertEquals(pkValues, expectedValues);

    pkValues.clear();

    assertEquals(uppercase ? "D" : "d", ((Element) rows.get(9)).getName());
    assertEquals(uppercase ? "D" : "d", ((Element) rows.get(10)).getName());
    pkValues.add(((Element) rows.get(9)).attributeValue(pkColumnName));
    pkValues.add(((Element) rows.get(10)).attributeValue(pkColumnName));
    assertEquals(pkValues, expectedValues);

    pkValues.clear();

    assertEquals(uppercase ? "F" : "f", ((Element) rows.get(11)).getName());
    assertEquals("1", ((Element) rows.get(11)).attributeValue(pkColumnName));

    dropDatabase();
    createDatabase(modelXml);

    StringReader stringReader = new StringReader(dataAsXml);

    dataIO.writeDataToDatabase(getPlatform(), getModel(), new Reader[] { stringReader });

    assertEquals(2, getRows("A").size());
    assertEquals(2, getRows("B").size());
    assertEquals(2, getRows("C").size());
    assertEquals(2, getRows("D").size());
    assertEquals(1, getRows("E").size());
    assertEquals(1, getRows("F").size());
    assertEquals(2, getRows("G").size());
}

From source file:org.biopax.ols.impl.BaseOBO2AbstractLoader.java

/**
 * internal helper method to create TermPathBeans for a given term. This method will
 * precompute all paths from a parent to all its children for the 3 major relationship types:
 * IS_A, PART_OF and DEVELOPS_FROM. The PART_OF and DEVELOPS_FROM relations can traverse IS_A
 * relations for maximal completeness and still be semantically correct, but IS_A relationships
 * cannot traverse other relation types.
 * <pre>// w w w  .  ja v a  2 s.co  m
 *        term1
 *            |_ child1        child1 IS_A term1
 *            |_ child2        child2 IS_A term1
 *                             subject pred object
 * </pre>
 *
 * @param obj - the OBOEdit term object to extract information from
 * @param trm - the OLS parent term to link to
 * @return a Collection of valid TermRelationshipBeans
 */
private Collection<TermPath> processPaths(OBOObject obj, TermBean trm) {

    HashSet<TermPath> retval = new HashSet<TermPath>();

    HashMap<String, Integer> paths = parser.computeChildPaths(1, IS_A_SET, obj);
    retval.addAll(createTermPathBeans(paths, Constants.IS_A_RELATION_TYPE_ID, IS_A, trm));

    //the part_of relation can traverse is_a relations to generate term_paths
    //so the set passed to computeChildPaths needs to contain both PART_OF and IS_A labels.
    HashSet<String> traversingSet = new HashSet<String>();
    traversingSet.addAll(PART_OF_SET);
    traversingSet.addAll(IS_A_SET);
    paths = parser.computeChildPaths(1, traversingSet, obj);
    retval.addAll(createTermPathBeans(paths, Constants.PART_OF_RELATION_TYPE_ID, PART_OF, trm));

    //the dev_from relation can traverse is_a relations to generate term_paths
    //so the set passed to computeChildPaths needs to contain both DEV_FROM and IS_A labels.
    traversingSet.clear();
    traversingSet.addAll(DEV_FROM_SET);
    traversingSet.addAll(IS_A_SET);
    paths = parser.computeChildPaths(1, traversingSet, obj);
    retval.addAll(createTermPathBeans(paths, Constants.DEVELOPS_FROM_RELATION_TYPE_ID, DEVELOPS_FROM, trm));

    return retval;
}

From source file:org.eclipse.rdf4j.sail.solr.SolrIndexTest.java

@Test
public void testAddMultiple() throws Exception {
    // add a statement to an index
    HashSet<Statement> added = new HashSet<Statement>();
    HashSet<Statement> removed = new HashSet<Statement>();
    added.add(statement11);/*  w  w  w .  j  ava2 s .  co m*/
    added.add(statement12);
    added.add(statement21);
    added.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check that it arrived properly
    long count = client.query(new SolrQuery("*:*").setRows(0)).getResults().getNumFound();
    assertEquals(2, count);

    // check the documents
    SearchDocument document = index.getDocuments(subject).iterator().next();
    assertEquals(subject.toString(), document.getResource());
    assertStatement(statement11, document);
    assertStatement(statement12, document);

    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.getResource());
    assertStatement(statement21, document);
    assertStatement(statement22, document);

    // check if the text field stores all added string values
    Set<String> texts = new HashSet<String>();
    texts.add("cats");
    texts.add("dogs");
    // FIXME
    // assertTexts(texts, document);

    // add/remove one
    added.clear();
    removed.clear();
    added.add(statement23);
    removed.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check doc 2
    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.getResource());
    assertStatement(statement21, document);
    assertStatement(statement23, document);
    assertNoStatement(statement22, document);

    // check if the text field stores all added and no deleted string values
    texts.remove("dogs");
    texts.add("chicken");
    // FIXME
    // assertTexts(texts, document);

    // TODO: check deletion of the rest

}

From source file:pltag.parser.Lexicon.java

public void getFamily(String string) {
    HashMap<String, Integer> similars = new HashMap<String, Integer>();
    int most = 0;
    HashSet<String> mostSimilar = new HashSet<String>();
    for (String tree : (Collection<String>) lexEntriesTree.getCollection(string)) {
        if (!this.noOfTrees.containsKey(tree) || noOfTrees.get(tree) < 5) {
            continue;
        }//from  w w  w  .  jav a 2 s .  c o  m
        String t = tree.substring(tree.indexOf("\t") + 1);
        for (String assoc : trees.getCollection(t)) {
            if (assoc.contains(" unk") || assoc.contains(string)) {
                continue;
            }
            if (similars.containsKey(assoc)) {
                int newNum = similars.get(assoc) + 1;
                similars.put(assoc, newNum);
                if (newNum > most) {
                    most = newNum;
                    mostSimilar.clear();
                    mostSimilar.add(assoc);
                }
                if (newNum == most) {
                    mostSimilar.add(assoc);
                }
            } else {
                similars.put(assoc, 1);
            }
        }
    }
    HashSet<String> simtrees = new HashSet<String>();
    for (String mostSimWords : mostSimilar) {
        simtrees.addAll((Collection<String>) lexEntriesTree.getCollection(mostSimWords));

    }
    System.out.println(mostSimilar + "\t");
    System.out.print(simtrees.toString() + "\n");
}