org.openhie.openempi.matching.fellegisunter.ProbabilisticMatchingServiceBase.java Source code

Java tutorial

Introduction

Here is the source code for org.openhie.openempi.matching.fellegisunter.ProbabilisticMatchingServiceBase.java

Source

/**
 *
 *  Copyright (C) 2010 SYSNET International, Inc. <support@sysnetint.com>
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 *  implied. See the License for the specific language governing
 *  permissions and limitations under the License.
 *
 */
package org.openhie.openempi.matching.fellegisunter;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.builder.EqualsBuilder;
import org.openhie.openempi.ApplicationException;
import org.openhie.openempi.Constants;
import org.openhie.openempi.InitializationException;
import org.openhie.openempi.configuration.ConfigurationRegistry;
import org.openhie.openempi.configuration.AdminConfiguration.ComponentType;
import org.openhie.openempi.context.Context;
import org.openhie.openempi.dao.PersonLinkDao;
import org.openhie.openempi.dao.hibernate.UniversalDaoHibernate;
import org.openhie.openempi.matching.AbstractMatchingService;
import org.openhie.openempi.matching.fellegisunter.MatchConfiguration.FieldQuerySelector;
import org.openhie.openempi.model.ColumnInformation;
import org.openhie.openempi.model.ColumnMatchInformation;
import org.openhie.openempi.model.ComparisonVector;
import org.openhie.openempi.model.Dataset;
import org.openhie.openempi.model.LeanRecordPair;
import org.openhie.openempi.model.Person;
import org.openhie.openempi.model.PersonLink;
import org.openhie.openempi.model.PersonMatch;
import org.openhie.openempi.model.User;
import org.openhie.openempi.recordlinkage.configuration.PrivacySettings;
import org.openhie.openempi.service.PersonManagerService;
import org.openhie.openempi.stringcomparison.DistanceMetricType;
import org.openhie.openempi.stringcomparison.StringComparisonService;
import org.openhie.openempi.util.GeneralUtil;
import org.openhie.openempi.util.SerializationUtil;

public abstract class ProbabilisticMatchingServiceBase extends AbstractMatchingService {
    protected FellegiSunterParameters fellegiSunterParams;
    protected PersonLinkDao personLinkDao;
    private static final double MIN_MARGINAL_VALUE = 0.000001;

    public void init() {
        log.debug("ProbabilisticMatchingServiceBase init");
    }

    protected abstract boolean useBinaryScores();

    public Set<LeanRecordPair> match(String blockingServiceTypeName, String leftTableName, String rightTableName,
            Person person) throws ApplicationException {
        log.debug("Looking for matches on person " + person);
        if (!initialized) {
            throw new ApplicationException("Matching service has not been initialized yet.");
        }
        List<LeanRecordPair> pairs = Context.getBlockingServiceSelector().findCandidates(blockingServiceTypeName,
                leftTableName, rightTableName, person);
        Set<LeanRecordPair> matches = new java.util.HashSet<LeanRecordPair>();
        //      scoreRecordPairs(pairs, fellegiSunterParams, false);
        calculateRecordPairWeights(pairs, fellegiSunterParams);

        // Apply Fellegi-Sunter decision rule
        for (LeanRecordPair pair : pairs) {
            if (pair.getWeight() >= fellegiSunterParams.getUpperBound()) {
                matches.add(pair);
            } else if (pair.getWeight() > fellegiSunterParams.getLowerBound()) {
                // This is a possible match; need to add it to the list for review
            }
        }
        return matches;
    }

    public PersonMatch linkRecords(String blockingServiceTypeName, Object blockingServiceCustomParameters,
            String matchingServiceTypeName, Object matchingServiceCustomParameters, String linkTableName,
            String leftTableName, String rightTableName, List<LeanRecordPair> pairsParam,
            ComponentType componentType, boolean emOnly, boolean persistLinks) throws ApplicationException {
        long startTime = System.currentTimeMillis();
        log.warn("Link Start: " + startTime);
        long totalMem = Runtime.getRuntime().totalMemory();
        long freeMem = Runtime.getRuntime().freeMemory();
        log.warn("Used memory = " + (totalMem - freeMem) + " total (" + totalMem + ") - free (" + freeMem + ")");
        MatchConfiguration matchConfig = (MatchConfiguration) Context.getConfiguration().lookupConfigurationEntry(
                ProbabilisticMatchingConstants.PROBABILISTIC_MATCHING_CONFIGURATION_REGISTRY_KEY);
        List<MatchField> matchFields = matchConfig.getMatchFields(FieldQuerySelector.MatchOnlyFields);
        int fieldCount = matchFields.size();
        fellegiSunterParams = new FellegiSunterParameters(fieldCount, 2);
        List<LeanRecordPair> pairs = pairsParam;
        if (pairs == null && !emOnly)
            pairs = new ArrayList<LeanRecordPair>();
        getRecordPairs(blockingServiceTypeName, blockingServiceCustomParameters, matchingServiceTypeName,
                matchingServiceCustomParameters, leftTableName, rightTableName, pairs, emOnly, fellegiSunterParams);
        fellegiSunterParams.setMu(matchConfig.getFalsePositiveProbability());
        fellegiSunterParams.setLambda(matchConfig.getFalseNegativeProbability());

        if (!emOnly) {
            //         scoreRecordPairs(pairs, fellegiSunterParams, true);
            calculateVectorFrequencies(pairs, fellegiSunterParams);
        }
        estimateMarginalProbabilities(fellegiSunterParams, matchConfig, pairs.size());
        long blockNfsTime = System.currentTimeMillis();
        long fileOutTime = 0L;
        log.trace("Fellegi Sunter Parameters:\n" + fellegiSunterParams);
        String fileRepoDir = Context.getConfiguration().getAdminConfiguration().getFileRepositoryDirectory();
        boolean parameterManagerMode = (componentType == ComponentType.PARAMETER_MANAGER_MODE);
        boolean cbfMatch = (!parameterManagerMode && matchFields.size() == 1
                && matchFields.get(0).getLeftFieldName().equals(UniversalDaoHibernate.CBF_ATTRIBUTE_NAME));
        if (!emOnly) {
            calculateRecordPairWeights(pairs, fellegiSunterParams);
            calculateMarginalProbabilities(pairs, fellegiSunterParams, false, fileRepoDir + "/" + linkTableName);
            fileOutTime = System.currentTimeMillis();
            if (!cbfMatch) {
                orderRecordPairsByWeight(pairs, false, fileRepoDir + "/" + linkTableName);
                calculateLowerBound(pairs, fellegiSunterParams);
                calculateUpperBound(pairs, fellegiSunterParams);
            }
        }
        SerializationUtil.serializeObject(fileRepoDir,
                linkTableName + "_" + Constants.FELLEGI_SUNTER_CONFIG_FILE_NAME, fellegiSunterParams);
        initialized = true;

        PersonManagerService personManagerService = Context.getPersonManagerService();
        Dataset leftDataset = personManagerService.getDatasetByTableName(leftTableName);
        Dataset rightDataset = personManagerService.getDatasetByTableName(rightTableName);
        PersonMatch personMatch = new PersonMatch();
        personMatch.setMatchTitle(linkTableName);
        personMatch.setLeftDataset(leftDataset);
        personMatch.setRightDataset(rightDataset);
        personMatch.setMatchConfiguration(matchConfig);
        personMatch.setMatchFellegiSunter(fellegiSunterParams);
        personMatch.setTotalRecords(Long.valueOf(pairs.size()));
        List<ColumnMatchInformation> columnMatchInformation = new ArrayList<ColumnMatchInformation>();
        Map<String, ColumnInformation> leftColumnInformationHash = new HashMap<String, ColumnInformation>();
        for (ColumnInformation ci : leftDataset.getColumnInformation()) {
            leftColumnInformationHash.put(ci.getFieldName(), ci);
        }
        Map<String, ColumnInformation> rightColumnInformationHash = new HashMap<String, ColumnInformation>();
        for (ColumnInformation ci : rightDataset.getColumnInformation()) {
            rightColumnInformationHash.put(ci.getFieldName(), ci);
        }
        int i = 0;
        long totalRecords = 0;
        double downweightedRangeSum = 0.0;
        if (parameterManagerMode && !emOnly)
            totalRecords = leftDataset.getTotalRecords() + rightDataset.getTotalRecords();
        StringComparisonService comparisonService = Context.getStringComparisonService();
        log.debug("Constructing ColumnMatchInformations...");
        for (MatchField matchField : matchFields) {
            ColumnMatchInformation cmi = new ColumnMatchInformation();
            cmi.setLeftFieldName(matchField.getLeftFieldName());
            cmi.setRightFieldName(matchField.getRightFieldName());
            cmi.setPersonMatch(personMatch);
            ColumnInformation leftCi = leftColumnInformationHash.get(matchField.getLeftFieldName());
            ColumnInformation rightCi = rightColumnInformationHash.get(matchField.getRightFieldName());
            if (!(new EqualsBuilder()
                    .append(leftCi.getFieldType().getFieldTypeEnum(), rightCi.getFieldType().getFieldTypeEnum())
                    .append(leftCi.getFieldTypeModifier(), rightCi.getFieldTypeModifier())
                    .append(leftCi.getFieldMeaning().getFieldMeaningEnum(),
                            rightCi.getFieldMeaning().getFieldMeaningEnum())
                    .append(leftCi.getFieldTransformation(), rightCi.getFieldTransformation())
                    .append(leftCi.getBloomFilterKParameter(), rightCi.getBloomFilterKParameter())
                    .append(leftCi.getBloomFilterMParameter(), rightCi.getBloomFilterMParameter()).isEquals())) {
                throw new ApplicationException(
                        "Field informations don't match; left: " + leftCi + ", right: " + rightCi);
            }
            cmi.setFieldType(leftCi.getFieldType().getFieldTypeEnum());
            //cmi.getFieldType();   // to load FieldType before saving
            String funcName = matchField.getComparatorFunction().getFunctionName();
            boolean noMatchField = (funcName.equals(Constants.NO_COMPARISON_JUST_TRANSFER_FUNCTION_NAME));
            DistanceMetricType distanceMetricType = comparisonService.getDistanceMetricType(funcName);
            if (distanceMetricType.getDistanceMetric().getInputType() != cmi.getFieldType().getFieldTypeEnum())
                throw new ApplicationException(
                        "Field types (" + distanceMetricType.getDistanceMetric().getInputType()
                                + ") are not compatible with the comparison function input type ("
                                + cmi.getFieldType().getFieldTypeEnum() + ")");
            cmi.setFieldTypeModifier(leftCi.getFieldTypeModifier());
            cmi.setFieldMeaning(leftCi.getFieldMeaning().getFieldMeaningEnum());
            cmi.getFieldMeaning(); // to load FieldType before saving
            cmi.setComparisonFunctionName(funcName);
            if (!noMatchField) {
                cmi.setFellegiSunterMValue(fellegiSunterParams.getMValue(i));
                cmi.setFellegiSunterUValue(fellegiSunterParams.getUValue(i));
                if (!emOnly) {
                    if (parameterManagerMode) {
                        double percentMissing = (Double.valueOf(leftCi.getNumberOfMissing())
                                + Double.valueOf(rightCi.getNumberOfMissing())) / Double.valueOf(totalRecords);
                        cmi.setPercentMissing(percentMissing);
                        double downweightedRange = cmi.getFellegiSunterWRange() * (1.0 - percentMissing);
                        cmi.setDownweightedRange(downweightedRange);
                        downweightedRangeSum += downweightedRange;
                        PrivacySettings privacySettings = (PrivacySettings) Context.getConfiguration()
                                .lookupConfigurationEntry(ConfigurationRegistry.RECORD_LINKAGE_PROTOCOL_SETTINGS);
                        // TODO: make p configurable
                        double p = 0.5; // Bloom filter fill factor
                        double n = (leftDataset.getTotalRecords() * leftCi.getAverageFieldLength()
                                + rightDataset.getTotalRecords() * rightCi.getAverageFieldLength()) / totalRecords
                                + (privacySettings.getBloomfilterSettings().getNGramSize() - 1);
                        double kn = leftCi.getBloomFilterKParameter() * n;
                        double valueFromBloomFormula = Math.pow(p, 1 / kn); // root(kn,p)
                        double dynamicBFLength = 1 / (1 - valueFromBloomFormula);
                        cmi.setDynamicBloomFilterLength(dynamicBFLength);
                        cmi.setBloomFilterFinalM((int) dynamicBFLength);
                    } else {
                        if (leftCi.getBloomFilterMParameter() != null
                                && leftCi.getBloomFilterMParameter() != Integer.MIN_VALUE)
                            cmi.setBloomFilterFinalM(leftCi.getBloomFilterMParameter());
                    }
                }
            }
            log.debug(cmi.toStringLong());
            columnMatchInformation.add(cmi);
            if (!noMatchField)
                i++;
        }
        if (parameterManagerMode && !emOnly) {
            double maxPossibleLength = 0.0;
            log.debug("Calculating percentBits and BloomFilterPossibleM for ColumnMatchInformations...");
            for (ColumnMatchInformation cmi : columnMatchInformation) {
                if (!cmi.getComparisonFunctionName().equals(Constants.NO_COMPARISON_JUST_TRANSFER_FUNCTION_NAME)) {
                    double percentBits = cmi.getDownweightedRange() / downweightedRangeSum;
                    cmi.setPercentBits(percentBits);
                    double possibleLength = cmi.getDynamicBloomFilterLength() / percentBits;
                    cmi.setBloomFilterPossibleM(possibleLength);
                    if (possibleLength > maxPossibleLength)
                        maxPossibleLength = possibleLength;
                }
                log.debug(cmi.toStringLong());
            }
            log.debug("Calculating BloomFilterProposedM for ColumnMatchInformations...");
            for (ColumnMatchInformation cmi : columnMatchInformation) {
                if (!cmi.getComparisonFunctionName().equals(Constants.NO_COMPARISON_JUST_TRANSFER_FUNCTION_NAME)) {
                    double proposedLength = maxPossibleLength * cmi.getPercentBits();
                    cmi.setBloomFilterProposedM((int) proposedLength);
                }
                log.debug(cmi.toStringLong());
            }
        }
        personMatch.setColumnMatchInformation(columnMatchInformation);
        // TODO: remove this if user management will be better:
        User currentUser = Context.getUserContext().getUser();
        if (currentUser == null) // TODO: should not happen in theory
            currentUser = leftDataset.getOwner();
        personMatch.setUserCreatedBy(currentUser);
        personMatch = personManagerService.addPersonMatch(personMatch);

        long endTime = System.currentTimeMillis();
        log.warn("Link End, Link persist Start: " + endTime + ", elapsed: " + (endTime - startTime));
        long linkPersistStartTime = System.currentTimeMillis();
        totalMem = Runtime.getRuntime().totalMemory();
        freeMem = Runtime.getRuntime().freeMemory();
        log.warn("Used memory = " + (totalMem - freeMem) + " total (" + totalMem + ") - free (" + freeMem + ")");
        if (!emOnly) {
            personManagerService.createLinkTable(linkTableName, leftTableName, rightTableName, false);

            long beginIndex = 0L;
            long linkId = 0L;
            if (persistLinks) {
                int countMatched = 0, countUnmatched = 0, countUndecided = 0;
                int size = 1;
                while (size > 0) {
                    int fromIndex = Math.min((int) beginIndex, pairs.size() - 1);
                    int toIndex = Math.min((int) beginIndex + Constants.PAGE_SIZE, pairs.size());
                    if ((int) beginIndex < pairs.size()) {
                        List<LeanRecordPair> subList = pairs.subList(fromIndex, toIndex);
                        size = subList.size();
                        if (size > 0) {
                            List<PersonLink> personLinks = new ArrayList<PersonLink>();
                            for (LeanRecordPair pair : subList) {
                                int linkState = UniversalDaoHibernate.LINK_STATUS_UNDECIDED;
                                if (pair.getWeight() <= fellegiSunterParams.getLowerBound()) {
                                    countUnmatched++;
                                    linkState = UniversalDaoHibernate.LINK_STATUS_NONMATCH;
                                } else if (pair.getWeight() > fellegiSunterParams.getLowerBound()
                                        && pair.getWeight() < fellegiSunterParams.getUpperBound()) {
                                    countUndecided++;
                                } else {
                                    countMatched++;
                                    linkState = UniversalDaoHibernate.LINK_STATUS_MATCH;
                                }
                                PersonLink personLink = cbfMatch
                                        ? GeneralUtil.constructPersonLink(linkId++, pair.getLeftRecordId(),
                                                pair.getRightRecordId(), pair.getWeight(), linkState)
                                        : GeneralUtil.constructPersonLink(pair, linkId++, linkState);
                                personLinks.add(personLink);
                            }
                            personManagerService.addPersonLinks(linkTableName, personLinks);
                            beginIndex += Constants.PAGE_SIZE;
                        }
                    } else {
                        size = 0;
                    }
                }
                log.debug("We automatically matched " + countMatched + ", unmatched " + countUnmatched
                        + " and have undecided: " + countUndecided);
            }
            personManagerService.addIndexesAndConstraintsToLinkTable(linkTableName, beginIndex + 1, leftTableName,
                    rightTableName);
        }
        long linkPersistEndTime = System.currentTimeMillis();
        totalMem = Runtime.getRuntime().totalMemory();
        freeMem = Runtime.getRuntime().freeMemory();
        log.warn("Used memory = " + (totalMem - freeMem) + " total (" + totalMem + ") - free (" + freeMem + ")");
        log.trace("ns of blocking + EM / FS: " + (blockNfsTime - startTime));
        log.trace("ns of file out + blocking + EM / FS: " + (fileOutTime - startTime));
        log.trace("ns from start to link persist: " + (linkPersistStartTime - startTime));
        log.trace("ns of link persist: " + (linkPersistEndTime - linkPersistStartTime));
        log.trace("ns total: " + (linkPersistEndTime - startTime));
        log.warn("Link persist End");

        return personMatch;
    }

    public FellegiSunterParameters getFellegiSunterParameters() {
        return fellegiSunterParams;
    }

    public void init(Map<String, Object> configParameters) throws InitializationException {
        try {
            fellegiSunterParams = (FellegiSunterParameters) SerializationUtil.deserializeObject(
                    Context.getConfiguration().getAdminConfiguration().getFileRepositoryDirectory(),
                    Constants.FELLEGI_SUNTER_CONFIG_FILE_NAME);
            initialized = true;
        } catch (Exception e) {
            initialized = false;
        }
    }

    public void getRecordPairs(String blockingServiceTypeName, Object blockingServiceCustomParameters,
            String matchingServiceTypeName, Object matchingServiceCustomParameters, String leftTableName,
            String rightTableName, List<LeanRecordPair> pairs, boolean emOnly,
            FellegiSunterParameters fellegiSunterParams) throws ApplicationException {
        Context.getBlockingServiceSelector().getRecordPairs(blockingServiceTypeName,
                blockingServiceCustomParameters, matchingServiceTypeName, matchingServiceCustomParameters,
                leftTableName, rightTableName, pairs, emOnly, fellegiSunterParams);
    }

    public void calculateVectorFrequencies(List<LeanRecordPair> pairs,
            FellegiSunterParameters fellegiSunterParams) {
        for (int i = 0; i < fellegiSunterParams.getVectorCount(); i++) {
            fellegiSunterParams.setVectorFrequency(i, 0);
        }
        for (LeanRecordPair pair : pairs) {
            ComparisonVector vector = pair.getComparisonVector();
            int vectorValue = vector.getBinaryVectorValue();
            fellegiSunterParams.incrementVectorFrequency(vectorValue);
        }
    }

    public void calculateRecordPairWeights(List<LeanRecordPair> pairs,
            FellegiSunterParameters fellegiSunterParams) {
        // Precompute wa, wd, nominator and denominator
        int fieldCount = fellegiSunterParams.getFieldCount();
        double wa[] = new double[fieldCount];
        double wd[] = new double[fieldCount];
        if (fieldCount > 1) {
            for (int i = 0; i < fellegiSunterParams.getFieldCount(); i++) {
                double mVali = fellegiSunterParams.getMValue(i);
                double uVali = fellegiSunterParams.getUValue(i);
                double numerator1 = adjustMinimumValue(mVali);
                double denominator1 = adjustMinimumValue(uVali);
                wa[i] = Math.log(numerator1 / denominator1) / Math.log(2.0);
                double numerator2 = 1.0 - mVali;
                double denominator2 = 1.0 - uVali;
                numerator2 = adjustMinimumValue(numerator2);
                denominator2 = adjustMinimumValue(denominator2);
                wd[i] = Math.log(numerator2 / denominator2) / Math.log(2.0);
            }
        }
        for (LeanRecordPair pair : pairs) {
            calculateRecordPairWeight(pair, fellegiSunterParams, wa, wd);
        }
    }

    protected abstract void calculateRecordPairWeight(LeanRecordPair pair,
            FellegiSunterParameters fellegiSunterParams, double wa[], double wd[]);

    private double adjustMinimumValue(double numerator) {
        if (numerator < MIN_MARGINAL_VALUE) {
            numerator = MIN_MARGINAL_VALUE;
        }
        return numerator;
    }

    public List<LeanRecordPair> orderRecordPairsByWeight(List<LeanRecordPair> pairs, boolean writeStat,
            String pathPrefix) {
        Collections.sort(pairs, new RecordPairComparatorByWeight());
        if (writeStat) {
            try {
                FileWriter fw = new FileWriter(pathPrefix + " " + Constants.WEIGHTS_FILE_NAME);
                BufferedWriter bw = new BufferedWriter(fw);
                try {
                    for (LeanRecordPair pair : pairs) {
                        bw.append(Double.toString(pair.getWeight())); // TODO: write some kind of ids
                        bw.newLine(); // System.getProperty("line.separator")
                    }
                } finally {
                    bw.close();
                }
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
        return pairs;
    }

    public List<LeanRecordPair> orderRecordPairsByProbabilityGivenM(List<LeanRecordPair> pairs) {
        Collections.sort(pairs, new RecordPairComparatorByProbabilityGivenM());
        return pairs;
    }

    public List<LeanRecordPair> orderRecordPairsByProbabilityGivenU(List<LeanRecordPair> pairs) {
        Collections.sort(pairs, new RecordPairComparatorByProbabilityGivenU());
        return pairs;
    }

    public void calculateMarginalProbabilities(List<LeanRecordPair> pairs,
            FellegiSunterParameters fellegiSunterParams, boolean writeStat, String pathPrefix) {
        double mpsum = 0.0;
        double upsum = 0.0;

        try {
            FileWriter fw = null;
            BufferedWriter bw = null;
            if (writeStat) {
                fw = new FileWriter(pathPrefix + "_" + Constants.MARGINAL_PROBABILITIES_FILE_NAME);
                bw = new BufferedWriter(fw);
            }
            try {
                for (LeanRecordPair pair : pairs) {
                    //               log.trace("Pair: " + getRecordPairMatchFields(pair));
                    String marginalProbs = "";
                    ComparisonVector vector = pair.getComparisonVector();
                    vector.calculateProbabilityGivenMatch(fellegiSunterParams.getMValues(), useBinaryScores());
                    vector.calculateProbabilityGivenNonmatch(fellegiSunterParams.getUValues(), useBinaryScores());
                    mpsum += vector.getVectorProbGivenM();
                    upsum += vector.getVectorProbGivenU();
                    if (writeStat) {
                        marginalProbs += (pair.getWeight() + "," + vector.getVectorProbGivenM() + ","
                                + vector.getVectorProbGivenU());
                        bw.append(marginalProbs);
                        bw.newLine(); // System.getProperty("line.separator")
                    }
                }
            } finally {
                if (writeStat)
                    bw.close();
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
        log.trace("mpsum: " + mpsum + " upsum: " + upsum);
    }

    public abstract void estimateMarginalProbabilities(FellegiSunterParameters fellegiSunterParams,
            MatchConfiguration matchConfig, int pairNumber);

    public void calculateLowerBound(List<LeanRecordPair> pairs, FellegiSunterParameters fellegiSunterParams) {
        double sum = 0;
        int index = 0;
        double probability = 0;
        for (LeanRecordPair pair : pairs) {
            probability = pair.getComparisonVector().getVectorProbGivenM();
            sum += probability;
            index++;
            if (sum > fellegiSunterParams.getLambda()) {
                break;
            }
        }
        log.debug("Sum: " + sum + ", lambda: " + fellegiSunterParams.getLambda() + ", index: " + (index - 1)
                + ", weight: " + pairs.get(index - 1).getWeight() + ", prob: " + probability);
        fellegiSunterParams.setLowerBound(pairs.get(index - 1).getWeight());
    }

    public void calculateUpperBound(List<LeanRecordPair> pairs, FellegiSunterParameters fellegiSunterParams) {
        double sum = 0;
        int index = 0;
        double probability = 0;
        index = pairs.size() - 1;
        for (int i = index; i >= 0; i--) {
            probability = pairs.get(i).getComparisonVector().getVectorProbGivenU();
            sum += probability;
            if (sum > fellegiSunterParams.getMu()) {
                break;
            }
            index--;
        }
        if (index < 0)
            index = 0;
        log.debug("Sum: " + sum + ", mu: " + fellegiSunterParams.getMu() + ", index: " + index + ", weight: "
                + pairs.get(index).getWeight() + ", prob: " + probability);
        fellegiSunterParams.setUpperBound(pairs.get(index).getWeight());
    }

    public PersonLinkDao getPersonLinkDao() {
        return personLinkDao;
    }

    public void setPersonLinkDao(PersonLinkDao personLinkDao) {
        this.personLinkDao = personLinkDao;
    }
}