uk.ac.ebi.intact.util.protein.utils.XrefUpdaterUtils.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.ebi.intact.util.protein.utils.XrefUpdaterUtils.java

Source

/*
 * Copyright (c) 2002 The European Bioinformatics Institute, and others.
 * All rights reserved. Please see the file LICENSE
 * in the root directory of this distribution.
 */
package uk.ac.ebi.intact.util.protein.utils;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import uk.ac.ebi.intact.core.context.DataContext;
import uk.ac.ebi.intact.core.context.IntactContext;
import uk.ac.ebi.intact.core.persistence.dao.CvObjectDao;
import uk.ac.ebi.intact.core.persistence.dao.DaoFactory;
import uk.ac.ebi.intact.core.persistence.dao.ProteinDao;
import uk.ac.ebi.intact.core.persistence.dao.XrefDao;
import uk.ac.ebi.intact.dbupdate.prot.ProteinUpdateProcessor;
import uk.ac.ebi.intact.dbupdate.prot.util.ProteinTools;
import uk.ac.ebi.intact.model.*;
import uk.ac.ebi.intact.model.util.ProteinUtils;
import uk.ac.ebi.intact.uniprot.model.UniprotProtein;
import uk.ac.ebi.intact.uniprot.model.UniprotProteinTranscript;
import uk.ac.ebi.intact.uniprot.model.UniprotXref;
import uk.ac.ebi.intact.util.protein.CvHelper;

import java.util.*;

/**
 * Utilities for updating Xrefs.
 *
 * @author Samuel Kerrien (skerrien@ebi.ac.uk)
 * @version $Id$
 * @since 1.1.2
 */
public final class XrefUpdaterUtils {

    private XrefUpdaterUtils() {
    }

    /**
     * Sets up a logger for that class.
     */
    public static final Log log = LogFactory.getLog(XrefUpdaterUtils.class);

    public static Xref convert(UniprotXref uniprotXref, CvDatabase db) {
        Institution owner = db.getOwner();
        return new InteractorXref(owner, db, uniprotXref.getAccession(), null);
    }

    public static Collection<Xref> convert(Collection<UniprotXref> uniprotXrefs, CvDatabase db) {
        Collection<Xref> xrefs = new ArrayList<Xref>(uniprotXrefs.size());
        for (UniprotXref uniprotXref : uniprotXrefs) {
            xrefs.add(convert(uniprotXref, db));
        }
        return xrefs;
    }

    public static XrefUpdaterReport updateAllProteinTranscriptXrefs(Protein protein,
            UniprotProteinTranscript uniprotTranscript, UniprotProtein uniprot, DataContext context,
            ProteinUpdateProcessor processor) {

        List<Xref> deletedXrefs = new ArrayList<Xref>();

        // UPDATE UNIPROT XREFS
        XrefUpdaterReport reports = updateProteinTranscriptUniprotXrefs(protein, uniprotTranscript, uniprot,
                context, processor);

        // CHECK THAT ALL INTACT XREF STILL EXIST IN UNIPROT, OTHERWISE DELETE THEM
        List<InteractorXref> refs = new ArrayList<InteractorXref>(protein.getXrefs());
        for (InteractorXref xref : refs) {

            // Dont's check uniprot xref as those one have been done earlier and as anyway, the uniprot Xref won't have
            // uniprot xref as intact protein does (the uniprot xref of the intact protein correspond to the primary and
            // secondary ac of the proteins).
            CvDatabase cvDb = xref.getCvDatabase();
            String cvDbMi = cvDb.getMiIdentifier();

            // we escape secondary xrefs
            if (CvDatabase.UNIPROT_MI_REF.equals(cvDbMi) || CvDatabase.INTACT_MI_REF.equals(cvDbMi)
                    || (xref.getCvXrefQualifier() != null
                            && ("intact-secondary".equalsIgnoreCase(xref.getCvXrefQualifier().getShortLabel())
                                    || CvXrefQualifier.SECONDARY_AC
                                            .equalsIgnoreCase(xref.getCvXrefQualifier().getShortLabel())))) {
                continue;
            } else {
                deletedXrefs.add(xref);
                ProteinTools.deleteInteractorXRef(protein, context, xref);
            }
        }

        if (reports == null && !deletedXrefs.isEmpty()) {
            reports = new XrefUpdaterReport(protein, Collections.EMPTY_LIST, deletedXrefs);
        } else if (reports != null && !deletedXrefs.isEmpty()) {
            reports.getRemovedXrefs().addAll(deletedXrefs);
        }

        return reports;
    }

    public static XrefUpdaterReport updateAllXrefs(Protein protein, UniprotProtein uniprotProtein,
            Map<String, String> databaseName2mi, DataContext context, ProteinUpdateProcessor processor,
            TreeSet<InteractorXref> sortedXrefs, TreeSet<UniprotXref> sortedUniprotXrefs) {

        sortedXrefs.clear();
        sortedXrefs.addAll(protein.getXrefs());
        Iterator<InteractorXref> intactIterator = sortedXrefs.iterator();

        sortedUniprotXrefs.clear();
        sortedUniprotXrefs.addAll(uniprotProtein.getCrossReferences());
        Iterator<UniprotXref> uniprotIterator = sortedUniprotXrefs.iterator();

        List<Xref> createdXrefs = new ArrayList<Xref>(uniprotProtein.getCrossReferences().size());
        List<Xref> deletedXrefs = new ArrayList<Xref>(protein.getXrefs().size());

        DaoFactory daoFactory = context.getDaoFactory();
        CvObjectDao<CvDatabase> dbDao = daoFactory.getCvObjectDao(CvDatabase.class);
        XrefDao<InteractorXref> refDao = daoFactory.getXrefDao(InteractorXref.class);

        InteractorXref currentIntact = null;
        UniprotXref currentUniprot = null;
        String db = null;
        CvDatabase cvDatabase = null;

        if (intactIterator.hasNext() && uniprotIterator.hasNext()) {
            currentIntact = intactIterator.next();
            currentUniprot = uniprotIterator.next();
            db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
            cvDatabase = currentIntact.getCvDatabase();

            if (db != null && cvDatabase != null) {
                do {
                    int dbComparator = cvDatabase.getIdentifier().compareTo(db);

                    if (dbComparator == 0) {
                        int acComparator = currentIntact.getPrimaryId().compareTo(currentUniprot.getAccession());

                        if (acComparator == 0) {
                            if (intactIterator.hasNext() && uniprotIterator.hasNext()) {
                                currentIntact = intactIterator.next();
                                currentUniprot = uniprotIterator.next();
                                db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
                                cvDatabase = currentIntact.getCvDatabase();
                            } else {
                                currentIntact = null;
                                currentUniprot = null;
                                db = null;
                                cvDatabase = null;
                            }
                        } else if (acComparator < 0) {
                            //intact has no match in uniprot
                            if (!CvDatabase.UNIPROT_MI_REF.equalsIgnoreCase(cvDatabase.getIdentifier())
                                    && !CvDatabase.INTACT_MI_REF
                                            .equalsIgnoreCase(currentIntact.getCvDatabase().getIdentifier())
                                    && !(currentIntact.getCvXrefQualifier() != null && ("intact-secondary"
                                            .equalsIgnoreCase(currentIntact.getCvXrefQualifier().getShortLabel())
                                            || CvXrefQualifier.SECONDARY_AC.equalsIgnoreCase(
                                                    currentIntact.getCvXrefQualifier().getShortLabel())))) {
                                deletedXrefs.add(currentIntact);
                                protein.removeXref(currentIntact);

                                refDao.delete(currentIntact);
                            }

                            if (intactIterator.hasNext()) {
                                currentIntact = intactIterator.next();
                                cvDatabase = currentIntact.getCvDatabase();
                            } else {
                                currentIntact = null;
                                cvDatabase = null;
                            }
                        } else {
                            //uniprot has no match in intact
                            CvDatabase cvDb = cvDatabase;

                            InteractorXref newXref = new InteractorXref(
                                    IntactContext.getCurrentInstance().getInstitution(), cvDb,
                                    currentUniprot.getAccession(), null);
                            protein.addXref(newXref);

                            refDao.persist(newXref);

                            createdXrefs.add(newXref);

                            if (uniprotIterator.hasNext()) {
                                currentUniprot = uniprotIterator.next();
                                db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
                            } else {
                                currentUniprot = null;
                                db = null;
                            }
                        }
                    } else if (dbComparator < 0) {
                        //intact has no match in uniprot
                        if (!CvDatabase.UNIPROT_MI_REF
                                .equalsIgnoreCase(currentIntact.getCvDatabase().getIdentifier())
                                && !CvDatabase.INTACT_MI_REF
                                        .equalsIgnoreCase(currentIntact.getCvDatabase().getIdentifier())
                                && !(currentIntact.getCvXrefQualifier() != null && ("intact-secondary"
                                        .equalsIgnoreCase(currentIntact.getCvXrefQualifier().getShortLabel())
                                        || CvXrefQualifier.SECONDARY_AC.equalsIgnoreCase(
                                                currentIntact.getCvXrefQualifier().getShortLabel())))) {
                            deletedXrefs.add(currentIntact);
                            protein.removeXref(currentIntact);

                            refDao.delete(currentIntact);
                        }
                        if (intactIterator.hasNext()) {
                            currentIntact = intactIterator.next();
                            cvDatabase = currentIntact.getCvDatabase();
                        } else {
                            currentIntact = null;
                            cvDatabase = null;
                        }
                    } else {
                        //uniprot has no match in intact
                        CvDatabase cvDb = dbDao.getByIdentifier(db);

                        if (cvDb != null) {

                            InteractorXref newXref = new InteractorXref(
                                    IntactContext.getCurrentInstance().getInstitution(), cvDb,
                                    currentUniprot.getAccession(), null);
                            protein.addXref(newXref);

                            refDao.persist(newXref);

                            createdXrefs.add(newXref);

                        } else {
                            log.debug("We are not copying across xref to " + db);
                        }

                        if (uniprotIterator.hasNext()) {
                            currentUniprot = uniprotIterator.next();
                            db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
                        } else {
                            currentUniprot = null;
                            db = null;
                        }
                    }
                } while (currentIntact != null && currentUniprot != null && db != null && cvDatabase != null);
            }
        }

        if (currentIntact != null || intactIterator.hasNext()) {
            if (currentIntact == null) {
                currentIntact = intactIterator.next();
                cvDatabase = currentIntact.getCvDatabase();
            }

            do {
                //intact has no match in uniprot
                if (cvDatabase == null || (cvDatabase != null
                        && !CvDatabase.UNIPROT_MI_REF.equalsIgnoreCase(cvDatabase.getIdentifier())
                        && !CvDatabase.INTACT_MI_REF.equalsIgnoreCase(cvDatabase.getIdentifier())
                        && !(currentIntact.getCvXrefQualifier() != null && ("intact-secondary"
                                .equalsIgnoreCase(currentIntact.getCvXrefQualifier().getShortLabel())
                                || CvXrefQualifier.SECONDARY_AC
                                        .equalsIgnoreCase(currentIntact.getCvXrefQualifier().getShortLabel()))))) {
                    deletedXrefs.add(currentIntact);

                    protein.removeXref(currentIntact);

                    refDao.delete(currentIntact);
                }

                if (intactIterator.hasNext()) {
                    currentIntact = intactIterator.next();
                    cvDatabase = currentIntact.getCvDatabase();
                } else {
                    currentIntact = null;
                    cvDatabase = null;
                }
            } while (currentIntact != null);
        }

        if (currentUniprot != null || uniprotIterator.hasNext()) {
            if (currentUniprot == null) {
                currentUniprot = uniprotIterator.next();
                db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
            }

            if (db != null) {
                do {
                    //uniprot has no match in intact
                    CvDatabase cvDb = dbDao.getByIdentifier(db);

                    if (cvDb != null) {

                        InteractorXref newXref = new InteractorXref(
                                IntactContext.getCurrentInstance().getInstitution(), cvDb,
                                currentUniprot.getAccession(), null);
                        protein.addXref(newXref);

                        refDao.persist(newXref);

                        createdXrefs.add(newXref);

                    } else {
                        log.debug("We are not copying across xref to " + db);
                    }

                    if (uniprotIterator.hasNext()) {
                        currentUniprot = uniprotIterator.next();
                        db = databaseName2mi.get(currentUniprot.getDatabase().toLowerCase());
                    } else {
                        currentUniprot = null;
                        db = null;
                    }
                } while (currentUniprot != null && db != null);
            }
        }

        // update uniprot xrefs
        XrefUpdaterReport report = updateUniprotXrefs(protein, uniprotProtein, context, processor);

        if (report == null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            report = new XrefUpdaterReport(protein, createdXrefs, deletedXrefs);

        } else if (report != null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            report.getAddedXrefs().addAll(createdXrefs);
            report.getRemovedXrefs().addAll(deletedXrefs);
        }

        sortedXrefs.clear();
        sortedUniprotXrefs.clear();

        return report;
    }

    /**
     * Update of the Xref of a protein.
     * <p/>
     * <pre>
     * Algo sketch:
     * 1) select a subset of the xref of the given protein based on the given CvDatabase
     * 2) select the outdated Xref
     * 3) reused them to create new Xref and delete the remaining one. By doing so we don't waste ACs
     * </pre>
     *
     * @param protein  the protein what we want to update the Xrefs
     * @param database the target database
     * @param newXrefs the new set of xrefs
     *
     * @return true if the protein has been updated, otherwise false
     */
    public static XrefUpdaterReport updateXrefCollection(Protein protein, CvDatabase database,
            Collection<Xref> newXrefs, DataContext context, ProteinUpdateProcessor processor) {

        if (protein == null) {
            throw new IllegalArgumentException("You must give a non null protein.");
        }

        if (database == null) {
            throw new IllegalArgumentException("You must give a non null database (cvDatabase)).");
        }

        if (newXrefs == null) {
            throw new IllegalArgumentException("You must give a non null collection of xrefs.");
        }

        boolean updated = false;
        Collection<Xref> currentXrefs = null;

        // select only the xref of the given database
        for (Xref xref : protein.getXrefs()) {
            if (database.equals(xref.getCvDatabase())) {
                if (currentXrefs == null) {
                    currentXrefs = new ArrayList<Xref>();
                }
                currentXrefs.add(xref);
            }
        }

        if (currentXrefs == null) {
            currentXrefs = Collections.EMPTY_LIST;
        }

        Collection<Xref> toDelete = CollectionUtils.subtract(currentXrefs, newXrefs); // current minus new
        if (log.isDebugEnabled()) {
            log.debug(toDelete.size() + " xrefs to delete: " + toDelete);
        }
        Collection<Xref> toCreate = CollectionUtils.subtract(newXrefs, currentXrefs);
        if (log.isDebugEnabled()) {
            log.debug(toCreate.size() + " xrefs to create: " + toCreate);
        }

        for (Xref xref : toCreate) {
            addNewXref(protein, xref, context);
        }

        for (Xref xref : toDelete) {
            // delete remaining outdated xrefs
            if (log.isDebugEnabled()) {
                log.debug("DELETING: " + xref);
            }
            Collection<InteractorXref> refs = new ArrayList(protein.getXrefs());
            Iterator<InteractorXref> protXrefsIterator = refs.iterator();
            while (protXrefsIterator.hasNext()) {
                InteractorXref interactorXref = protXrefsIterator.next();

                if (interactorXref.getPrimaryId().equals(xref.getPrimaryId())) {
                    ProteinTools.deleteInteractorXRef(protein, context, interactorXref);
                }

            }
        }

        return new XrefUpdaterReport(protein, toCreate.toArray(new Xref[toCreate.size()]),
                toDelete.toArray(new Xref[toDelete.size()]));
    }

    private static boolean addNewXref(AnnotatedObject current, final Xref xref, DataContext context) {
        // Make sure the xref does not yet exist in the object
        if (current.getXrefs().contains(xref)) {
            log.debug("SKIPPED: [" + xref + "] already exists");
            return false; // quit
        }

        // add the xref to the AnnotatedObject
        context.getDaoFactory().getXrefDao().persist(xref);
        current.addXref(xref);

        return true;
    }

    public static InteractorXref getOlderUniprotIdentity(List<InteractorXref> uniprotIdentities) {
        if (uniprotIdentities.isEmpty()) {
            return null;
        }
        Iterator<InteractorXref> iterator = uniprotIdentities.iterator();

        InteractorXref older = iterator.next();
        while (iterator.hasNext()) {
            InteractorXref ref = iterator.next();

            if (ref.getCreated().before(older.getCreated())) {
                older = ref;
            }
        }

        return older;
    }

    public static XrefUpdaterReport fixDuplicateOfSameUniprotIdentity(List<InteractorXref> uniprotIdentities,
            Protein prot, DataContext context, ProteinUpdateProcessor processor) {
        InteractorXref original = getOlderUniprotIdentity(uniprotIdentities);

        ProteinDao proteinDao = context.getDaoFactory().getProteinDao();
        Collection<InteractorXref> deletedDuplicate = new ArrayList<InteractorXref>();

        if (original != null) {
            for (InteractorXref ref : uniprotIdentities) {
                if (ref.getPrimaryId() != null && ref.getPrimaryId().equalsIgnoreCase(original.getPrimaryId())) {
                    deletedDuplicate.add(ref);
                    ProteinTools.deleteInteractorXRef(prot, context, ref);
                } else if (ref.getPrimaryId() == null && original.getPrimaryId() == null) {
                    deletedDuplicate.add(ref);
                    ProteinTools.deleteInteractorXRef(prot, context, ref);
                }
            }
        }

        XrefUpdaterReport report = new XrefUpdaterReport(prot, new Xref[] {},
                deletedDuplicate.toArray(new Xref[deletedDuplicate.size()]));

        proteinDao.update((ProteinImpl) prot);

        return report;
    }

    public static XrefUpdaterReport updateUniprotXrefs(Protein protein, UniprotProtein uniprotProtein,
            DataContext context, ProteinUpdateProcessor processor) {
        XrefUpdaterReport reports = null;

        List<Xref> deletedXrefs = new ArrayList<Xref>(protein.getXrefs().size());
        List<Xref> createdXrefs = new ArrayList<Xref>(uniprotProtein.getCrossReferences().size());

        List<InteractorXref> uniprotIdentities = ProteinTools.getAllUniprotIdentities(protein);
        if (uniprotIdentities.size() > 1) {
            XrefUpdaterReport intermediary = fixDuplicateOfSameUniprotIdentity(uniprotIdentities, protein, context,
                    processor);
            if (intermediary != null) {
                reports = intermediary;
            }
        }

        CvDatabase uniprot;
        CvXrefQualifier identity;
        CvXrefQualifier secondary = null;
        String dbRelease = uniprotProtein.getReleaseVersion();

        InteractorXref uniprotXref = ProteinUtils.getUniprotXref(protein);
        Institution owner = CvHelper.getInstitution();

        if (uniprotXref != null) {
            uniprot = uniprotXref.getCvDatabase();
            identity = uniprotXref.getCvXrefQualifier();
        } else {
            uniprot = CvHelper.getDatabaseByMi(CvDatabase.UNIPROT_MI_REF);
            identity = CvHelper.getQualifierByMi(CvXrefQualifier.IDENTITY_MI_REF);

            uniprotXref = new InteractorXref(owner, uniprot, uniprotProtein.getPrimaryAc(), null, dbRelease,
                    identity);
            protein.addXref(uniprotXref);

            context.getDaoFactory().getXrefDao(InteractorXref.class).persist(uniprotXref);

            createdXrefs.add(uniprotXref);
        }

        log.debug("Found " + uniprotProtein.getSecondaryAcs().size() + " secondary ACs");
        // update secondary xrefs
        List<String> secondaryUniprot = new ArrayList<String>(uniprotProtein.getSecondaryAcs());
        for (InteractorXref xref : protein.getXrefs()) {
            if (xref.getCvDatabase() != null
                    && xref.getCvDatabase().getIdentifier().equals(CvDatabase.UNIPROT_MI_REF)) {
                if (xref.getCvXrefQualifier() != null
                        && xref.getCvXrefQualifier().getIdentifier().equals(CvXrefQualifier.SECONDARY_AC_MI_REF)) {
                    secondary = xref.getCvXrefQualifier();

                    // secondary xref already exists
                    if (secondaryUniprot.contains(xref.getPrimaryId())) {
                        secondaryUniprot.remove(xref.getPrimaryId());
                    }
                    // secondary xref does not exist, we dele it
                    else {
                        deletedXrefs.add(xref);
                        ProteinTools.deleteInteractorXRef(protein, context, xref);
                    }
                }
            }
        }

        if (secondary == null) {
            secondary = CvHelper.getQualifierByMi(CvXrefQualifier.SECONDARY_AC_MI_REF);
        }
        // create missing secondary xrefs
        for (String secondaryToAdd : secondaryUniprot) {
            InteractorXref interactorXref = new InteractorXref(owner, uniprot, secondaryToAdd, null, dbRelease,
                    secondary);
            protein.addXref(interactorXref);

            context.getDaoFactory().getXrefDao(InteractorXref.class).persist(interactorXref);
            createdXrefs.add(interactorXref);
        }

        if (reports == null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            reports = new XrefUpdaterReport(protein, createdXrefs, deletedXrefs);
        } else if (reports != null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            reports.getAddedXrefs().addAll(createdXrefs);
            reports.getRemovedXrefs().addAll(deletedXrefs);
        }
        return reports;
    }

    public static XrefUpdaterReport updateProteinTranscriptUniprotXrefs(Protein intactTranscript,
            UniprotProteinTranscript uniprotProteinTranscript, UniprotProtein uniprotProtein, DataContext context,
            ProteinUpdateProcessor processor) {
        XrefUpdaterReport reports = null;

        List<Xref> deletedXrefs = new ArrayList<Xref>(intactTranscript.getXrefs().size());
        List<Xref> createdXrefs = new ArrayList<Xref>(uniprotProteinTranscript.getSecondaryAcs().size() + 1);

        List<InteractorXref> uniprotIdentities = ProteinTools.getAllUniprotIdentities(intactTranscript);
        if (uniprotIdentities.size() > 1) {
            XrefUpdaterReport intermediary = fixDuplicateOfSameUniprotIdentity(uniprotIdentities, intactTranscript,
                    context, processor);
            if (intermediary != null) {
                reports = intermediary;
            }
        }

        CvDatabase uniprot;
        CvXrefQualifier identity;
        CvXrefQualifier secondary = null;
        String dbRelease = uniprotProtein.getReleaseVersion();

        InteractorXref uniprotXref = ProteinUtils.getUniprotXref(intactTranscript);
        Institution owner = CvHelper.getInstitution();

        if (uniprotXref != null) {
            uniprot = uniprotXref.getCvDatabase();
            identity = uniprotXref.getCvXrefQualifier();
        } else {
            uniprot = CvHelper.getDatabaseByMi(CvDatabase.UNIPROT_MI_REF);
            identity = CvHelper.getQualifierByMi(CvXrefQualifier.IDENTITY_MI_REF);

            uniprotXref = new InteractorXref(owner, uniprot, uniprotProteinTranscript.getPrimaryAc(), null,
                    dbRelease, identity);
            intactTranscript.addXref(uniprotXref);

            context.getDaoFactory().getXrefDao(InteractorXref.class).persist(uniprotXref);

            createdXrefs.add(uniprotXref);
        }

        log.debug("Found " + uniprotProteinTranscript.getSecondaryAcs().size() + " secondary ACs");
        // update secondary xrefs
        List<String> secondaryUniprot = new ArrayList<String>(uniprotProteinTranscript.getSecondaryAcs());
        List<InteractorXref> refs = new ArrayList(intactTranscript.getXrefs());
        for (InteractorXref xref : refs) {
            if (xref.getCvDatabase() != null
                    && xref.getCvDatabase().getIdentifier().equals(CvDatabase.UNIPROT_MI_REF)) {
                if (xref.getCvXrefQualifier() != null
                        && xref.getCvXrefQualifier().getIdentifier().equals(CvXrefQualifier.SECONDARY_AC_MI_REF)) {
                    secondary = xref.getCvXrefQualifier();

                    // secondary xref already exists
                    if (secondaryUniprot.contains(xref.getPrimaryId())) {
                        secondaryUniprot.remove(xref.getPrimaryId());
                    }
                    // secondary xref does not exist, we dele it
                    else {
                        deletedXrefs.add(xref);
                        ProteinTools.deleteInteractorXRef(intactTranscript, context, xref);
                    }
                }
            }
        }

        if (secondary == null) {
            secondary = CvHelper.getQualifierByMi(CvXrefQualifier.SECONDARY_AC_MI_REF);
        }
        // create missing secondary xrefs
        for (String secondaryToAdd : secondaryUniprot) {
            InteractorXref interactorXref = new InteractorXref(owner, uniprot, secondaryToAdd, null, dbRelease,
                    secondary);
            intactTranscript.addXref(interactorXref);

            context.getDaoFactory().getXrefDao(InteractorXref.class).persist(interactorXref);
            createdXrefs.add(interactorXref);
        }

        if (reports == null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            reports = new XrefUpdaterReport(intactTranscript, createdXrefs, deletedXrefs);
        } else if (reports != null && (!createdXrefs.isEmpty() || !deletedXrefs.isEmpty())) {
            reports.getAddedXrefs().addAll(createdXrefs);
            reports.getRemovedXrefs().addAll(deletedXrefs);
        }
        return reports;
    }

    public static Map<String, Collection<UniprotXref>> clusterByDatabaseName(Collection<UniprotXref> xrefs) {

        Map<String, Collection<UniprotXref>> xrefCluster = new HashMap<String, Collection<UniprotXref>>(
                xrefs.size());

        for (UniprotXref cr : xrefs) {
            Collection<UniprotXref> c = xrefCluster.get(cr.getDatabase());
            if (c == null) {
                // not found, add an entry
                c = new ArrayList<UniprotXref>();
                xrefCluster.put(cr.getDatabase(), c);
            }
            c.add(cr);
        }

        return xrefCluster;
    }

}