gov.nih.nci.caarray.application.translation.magetab.SdrfTranslator.java Source code

Java tutorial

Introduction

Here is the source code for gov.nih.nci.caarray.application.translation.magetab.SdrfTranslator.java

Source

//======================================================================================
// Copyright 5AM Solutions Inc, Yale University
//
// Distributed under the OSI-approved BSD 3-Clause License.
// See http://ncip.github.com/caarray/LICENSE.txt for details.
//======================================================================================
package gov.nih.nci.caarray.application.translation.magetab;

import edu.georgetown.pir.Organism;
import gov.nih.nci.caarray.application.util.MessageTemplates;
import gov.nih.nci.caarray.application.vocabulary.VocabularyService;
import gov.nih.nci.caarray.dao.CaArrayDaoFactory;
import gov.nih.nci.caarray.domain.AbstractCaArrayEntity;
import gov.nih.nci.caarray.domain.AbstractCaArrayObject;
import gov.nih.nci.caarray.domain.array.Array;
import gov.nih.nci.caarray.domain.array.ArrayDesign;
import gov.nih.nci.caarray.domain.contact.Organization;
import gov.nih.nci.caarray.domain.data.AbstractArrayData;
import gov.nih.nci.caarray.domain.data.DerivedArrayData;
import gov.nih.nci.caarray.domain.data.Image;
import gov.nih.nci.caarray.domain.data.RawArrayData;
import gov.nih.nci.caarray.domain.file.CaArrayFile;
import gov.nih.nci.caarray.domain.file.CaArrayFileSet;
import gov.nih.nci.caarray.domain.file.FileStatus;
import gov.nih.nci.caarray.domain.file.FileType;
import gov.nih.nci.caarray.domain.hybridization.Hybridization;
import gov.nih.nci.caarray.domain.project.AbstractFactorValue;
import gov.nih.nci.caarray.domain.project.Experiment;
import gov.nih.nci.caarray.domain.project.ExperimentOntology;
import gov.nih.nci.caarray.domain.project.ExperimentOntologyCategory;
import gov.nih.nci.caarray.domain.project.Factor;
import gov.nih.nci.caarray.domain.project.MeasurementFactorValue;
import gov.nih.nci.caarray.domain.project.Project;
import gov.nih.nci.caarray.domain.project.TermBasedFactorValue;
import gov.nih.nci.caarray.domain.project.UserDefinedFactorValue;
import gov.nih.nci.caarray.domain.protocol.AbstractParameterValue;
import gov.nih.nci.caarray.domain.protocol.MeasurementParameterValue;
import gov.nih.nci.caarray.domain.protocol.Parameter;
import gov.nih.nci.caarray.domain.protocol.Protocol;
import gov.nih.nci.caarray.domain.protocol.ProtocolApplication;
import gov.nih.nci.caarray.domain.protocol.ProtocolTypeAssociation;
import gov.nih.nci.caarray.domain.protocol.TermBasedParameterValue;
import gov.nih.nci.caarray.domain.protocol.UserDefinedParameterValue;
import gov.nih.nci.caarray.domain.sample.AbstractBioMaterial;
import gov.nih.nci.caarray.domain.sample.AbstractCharacteristic;
import gov.nih.nci.caarray.domain.sample.Extract;
import gov.nih.nci.caarray.domain.sample.LabeledExtract;
import gov.nih.nci.caarray.domain.sample.MeasurementCharacteristic;
import gov.nih.nci.caarray.domain.sample.Sample;
import gov.nih.nci.caarray.domain.sample.Source;
import gov.nih.nci.caarray.domain.sample.TermBasedCharacteristic;
import gov.nih.nci.caarray.domain.sample.UserDefinedCharacteristic;
import gov.nih.nci.caarray.domain.vocabulary.Category;
import gov.nih.nci.caarray.domain.vocabulary.Term;
import gov.nih.nci.caarray.domain.vocabulary.TermSource;
import gov.nih.nci.caarray.magetab.MageTabDocumentSet;
import gov.nih.nci.caarray.magetab.OntologyTerm;
import gov.nih.nci.caarray.magetab.sdrf.AbstractSampleDataRelationshipNode;
import gov.nih.nci.caarray.magetab.sdrf.Characteristic;
import gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataFile;
import gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataMatrixFile;
import gov.nih.nci.caarray.magetab.sdrf.Provider;
import gov.nih.nci.caarray.magetab.sdrf.SdrfColumnType;
import gov.nih.nci.caarray.magetab.sdrf.SdrfDocument;
import gov.nih.nci.caarray.magetab.sdrf.SdrfNodeType;
import gov.nih.nci.caarray.validation.ValidationMessage.Type;

import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections.map.MultiKeyMap;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.google.common.base.Predicate;
import com.google.common.collect.Collections2;

/**
 * Translates entities in SDRF documents into caArray entities.
 */
@SuppressWarnings({ "PMD.CyclomaticComplexity", "PMD.TooManyMethods", "PMD.ExcessiveClassLength" })
final class SdrfTranslator extends AbstractTranslator {

    private static final Logger LOG = Logger.getLogger(SdrfTranslator.class);

    private static final String GENERATED_SAMPLE_PREFIX = "GeneratedSample.";
    private static final String GENERATED_EXTRACT_PREFIX = "GeneratedExtract.";
    private static final String GENERATED_LABELED_EXTRACT_PREFIX = "GeneratedLabeledExtract.";

    private final Map<AbstractSampleDataRelationshipNode, AbstractCaArrayEntity> nodeTranslations = new HashMap<AbstractSampleDataRelationshipNode, AbstractCaArrayEntity>();
    private final Map<AbstractSampleDataRelationshipNode, Boolean> isNodeLinked = new HashMap<AbstractSampleDataRelationshipNode, Boolean>();
    private final List<Source> allSources = new ArrayList<Source>();
    private final List<Sample> allSamples = new ArrayList<Sample>();
    private final List<Extract> allExtracts = new ArrayList<Extract>();
    private final List<LabeledExtract> allLabeledExtracts = new ArrayList<LabeledExtract>();
    private final List<Hybridization> allHybridizations = new ArrayList<Hybridization>();
    private final Map<String, AbstractCaArrayEntity> generatedNodes = new HashMap<String, AbstractCaArrayEntity>();
    private final Map<ProtocolKey, Protocol> importedProtocolMap = new HashMap<ProtocolKey, Protocol>();
    private final Map<Term, Organism> termToOrganism = new HashMap<Term, Organism>();
    private final VocabularyService vocabularyService;
    private final MultiKeyMap paramMap = new MultiKeyMap();
    private final Experiment experiment;
    private final TermTranslator termTranslator;

    SdrfTranslator(MageTabDocumentSet documentSet, CaArrayFileSet fileSet,
            MageTabTranslationResult translationResult, CaArrayDaoFactory daoFactory,
            VocabularyService vocabularyService) {
        super(documentSet, fileSet, translationResult, daoFactory);
        this.vocabularyService = vocabularyService;

        final Project project = getDaoFactory().getSearchDao().retrieve(Project.class, getFileSet().getProjectId());
        if (project != null) {
            this.experiment = project.getExperiment();
        } else {
            this.experiment = null;
        }

        this.termTranslator = new TermTranslator(documentSet, translationResult, vocabularyService, daoFactory);
    }

    @Override
    void translate() {
        for (final SdrfDocument document : getDocumentSet().getSdrfDocuments()) {
            translateSdrf(document);
        }
        // cleanup the organism terms
        getTranslationResult().removeOrganismTerms();
    }

    void validate() {
        // if there are no sdrf docs in the set, then this a data-only import,
        // so no mage-tab specific validation to perform
        if (getDocumentSet().getSdrfDocuments().isEmpty()) {
            return;
        }

        final Set<String> externalIds = getExistingExternalIdsForCurrentExperiment();
        for (final SdrfDocument document : getDocumentSet().getSdrfDocuments()) {
            validateSdrf(document, externalIds);
        }
        validateFileReferences();
    }

    private void validateFileReferences() {
        final List<String> referencedRawFiles = getDocumentSet().getSdrfReferencedRawFileNames();
        final List<String> referencedDerivedFiles = getDocumentSet().getSdrfReferencedDerivedFileNames();
        final List<String> referencedDataMatrixFiles = getDocumentSet().getSdrfReferencedDataMatrixFileNames();

        for (final CaArrayFile file : getFileSet().getFiles()) {
            final FileType fileType = file.getFileType();
            final boolean isRaw = fileType.isRawArrayData();
            final boolean referencedAsRaw = referencedRawFiles.contains(file.getName());
            final boolean isDerived = fileType.isDerivedArrayData();
            final boolean referencedAsDerived = referencedDerivedFiles.contains(file.getName());
            final boolean isMatrix = fileType.isDataMatrix();
            final boolean referencedAsMatrix = referencedDataMatrixFiles.contains(file.getName());
            final boolean referencedAsAny = referencedAsRaw || referencedAsDerived || referencedAsMatrix;

            if (isMatrix && !referencedAsMatrix) {
                addFileReferenceError(file, referencedAsAny, SdrfColumnType.ARRAY_DATA_MATRIX_FILE.getDisplayName()
                        + " or " + SdrfColumnType.DERIVED_ARRAY_DATA_MATRIX_FILE.getDisplayName());
            } else if (isRaw && !isMatrix && !referencedAsRaw) {
                addFileReferenceError(file, referencedAsAny, SdrfColumnType.ARRAY_DATA_FILE.getDisplayName());
            } else if (isDerived && !isMatrix && !referencedAsDerived) {
                addFileReferenceError(file, referencedAsAny,
                        SdrfColumnType.DERIVED_ARRAY_DATA_FILE.getDisplayName());
            }
        }
    }

    private void addFileReferenceError(CaArrayFile caArrayFile, boolean referencedAsAny, String correctColumn) {
        final String message = referencedAsAny
                ? "This file is not correctly referenced from an SDRF file. " + "It should be referenced using an "
                        + correctColumn + " column"
                : "This data file is not referenced from an SDRF file.";
        getDocumentSet().getValidationResult().addMessage(caArrayFile.getName(), Type.ERROR, message);
    }

    private void validateSdrf(SdrfDocument document, Set<String> externalIds) {
        validateArrayDesigns(document);
        validateSamples(document, externalIds);
    }

    private void validateSamples(SdrfDocument document, Set<String> externalIds) {
        for (final gov.nih.nci.caarray.magetab.sdrf.AbstractBioMaterial sdrfBm : document.getAllBiomaterials()) {
            for (final Characteristic sdrfCharacteristic : sdrfBm.getCharacteristics()) {
                final String category = sdrfCharacteristic.getCategory();
                if (category != null) {
                    checkForDuplicateExternalIds(document, externalIds, sdrfCharacteristic, category);
                    checkForIncorrectOrganismTermSource(document, sdrfCharacteristic, category);
                }
            }
        }
    }

    private void checkForIncorrectOrganismTermSource(SdrfDocument document, final Characteristic sdrfCharacteristic,
            final String category) {
        if (ExperimentOntologyCategory.ORGANISM.getCategoryName().equalsIgnoreCase(category)
                && (null != sdrfCharacteristic.getTerm().getTermSource() && (!sdrfCharacteristic.getTerm()
                        .getTermSource().getName().equals(ExperimentOntology.NCBI.getOntologyName())))) {
            document.addErrorMessage("The Characteristics [" + category + "] associated Term Source '"
                    + sdrfCharacteristic.getTerm().getTermSource().getName() + "' is invalid.  It must be '"
                    + ExperimentOntology.NCBI.getOntologyName() + "', or the Term Source should be omitted"
                    + ", so the system can then auto-assign the " + ExperimentOntology.NCBI.getOntologyName()
                    + " Term Source.");
        }
    }

    private void checkForDuplicateExternalIds(SdrfDocument document, Set<String> externalIds,
            final Characteristic sdrfCharacteristic, final String category) {
        final boolean isExternalId = ExperimentOntologyCategory.EXTERNAL_SAMPLE_ID.getCategoryName()
                .equalsIgnoreCase(category.replaceAll("\\s", ""))
                || ExperimentOntologyCategory.EXTERNAL_ID.getCategoryName()
                        .equalsIgnoreCase(category.replaceAll("\\s", ""));
        if (isExternalId && !StringUtils.isEmpty(sdrfCharacteristic.getValue())) {
            final boolean added = externalIds.add(sdrfCharacteristic.getValue());
            if (!added) {
                document.addWarningMessage("[" + category + "] value '" + sdrfCharacteristic.getValue()
                        + "' is referenced multiple times (" + category + " must be unique). "
                        + "Existing value will be reused.");
            }
        }
    }

    private Set<String> getExistingExternalIdsForCurrentExperiment() {
        final Set<String> results = new HashSet<String>();
        final Set<AbstractBioMaterial> persistedBms = getProjectDao()
                .getUnfilteredBiomaterialsForProject(getFileSet().getProjectId());
        for (final AbstractBioMaterial bm : persistedBms) {
            if (bm.getExternalId() != null && !results.add(bm.getExternalId())) {
                throw new IllegalStateException("System contains samples with duplicate external sample id " + "("
                        + bm.getExternalId() + ") already. Unable to continue, please correct"
                        + " existing samples and try again.");
            }
        }
        return results;
    }

    private void validateArrayDesigns(SdrfDocument document) {
        final Set<String> namesOfArrayDesignsForExperiment = new HashSet<String>();
        if (null != this.experiment) {
            for (final ArrayDesign experimentArrayDesign : this.experiment.getArrayDesigns()) {
                namesOfArrayDesignsForExperiment.add(experimentArrayDesign.getName());
            }
        }
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDesign sdrfArrayDesign : document.getAllArrayDesigns()) {
            final String arrayDesignName = sdrfArrayDesign.getValue();
            final ArrayDesign modelArrayDesign = new ArrayDesign();
            modelArrayDesign.setLsidForEntity(arrayDesignName);
            final List<ArrayDesign> matchingArrayDesignsAlreadyInSystemList = getDaoFactory().getArrayDao()
                    .queryEntityByExample(modelArrayDesign);
            if (null == matchingArrayDesignsAlreadyInSystemList
                    || matchingArrayDesignsAlreadyInSystemList.isEmpty()) {
                document.addErrorMessage(String.format(
                        MessageTemplates.NON_EXISTING_ARRAY_DESIGN_ERROR_MESSAGE_TEMPLATE, arrayDesignName));
            }
            String nameToCheck = arrayDesignName;
            if (null != matchingArrayDesignsAlreadyInSystemList
                    && !matchingArrayDesignsAlreadyInSystemList.isEmpty()) {
                nameToCheck = matchingArrayDesignsAlreadyInSystemList.get(0).getName();
            }
            if (!namesOfArrayDesignsForExperiment.isEmpty()
                    && !namesOfArrayDesignsForExperiment.contains(nameToCheck)) {
                document.addErrorMessage(String.format(
                        MessageTemplates.ARRAY_DESIGN_NOT_ASSOCIATED_WITH_EXPERIMENT_ERROR_MESSAGE_TEMPLATE,
                        nameToCheck));
            }
        }
    }

    private void translateSdrf(SdrfDocument document) {
        translateNodesToEntities(document);
        linkNodes(document);
        /**
         * added following if statement b/c sdrf doesn't have idf document. i could be wrong. this was causing error on
         * imports of sdrf. JH 10/4/07
         * 
         */
        if (document.getIdfDocument() != null) {
            final String investigationTitle = document.getIdfDocument().getInvestigation().getTitle();
            for (final Experiment investigation : getTranslationResult().getInvestigations()) {
                if (investigationTitle.equals(investigation.getTitle())) {
                    addImplicitExtracts();
                    addImplicitSamples();
                    addImplicitSources();
                    investigation.getSources().addAll(this.allSources);
                    investigation.getSamples().addAll(this.allSamples);
                    investigation.getExtracts().addAll(this.allExtracts);
                    investigation.getLabeledExtracts().addAll(this.allLabeledExtracts);
                    investigation.getHybridizations().addAll(this.allHybridizations);
                }
            }
        }
    }

    private void addImplicitExtracts() {
        for (final LabeledExtract labeledExtract : this.allLabeledExtracts) {
            if (labeledExtract.getExtracts().isEmpty()) {
                final Extract extract = new Extract();
                extract.setName(labeledExtract.getName());
                extract.getLabeledExtracts().add(labeledExtract);
                labeledExtract.getExtracts().add(extract);
                this.allExtracts.add(extract);
            }
        }
    }

    private void addImplicitSamples() {
        for (final Extract extract : this.allExtracts) {
            if (extract.getSamples().isEmpty()) {
                final Sample sample = new Sample();
                sample.setName(extract.getName());
                sample.getExtracts().add(extract);
                extract.getSamples().add(sample);
                this.allSamples.add(sample);
            }
        }
    }

    private void addImplicitSources() {
        for (final Sample sample : this.allSamples) {
            if (sample.getSources().isEmpty()) {
                final Source source = new Source();
                source.setName(sample.getName());
                source.getSamples().add(sample);
                sample.getSources().add(source);
                this.allSources.add(source);
            }
        }
    }

    private void translateNodesToEntities(SdrfDocument document) {
        translateSources(document);
        translateSamples(document);
        translateExtracts(document);
        translateLabeledExtracts(document);
        translateHybridizations(document);
        translateArrayDesigns(document);
        translateImages(document);
        translateRawArrayData(document);
        translateDerivedArrayData(document);
    }

    private void translateSources(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Source sdrfSource : document.getAllSources()) {
            Source source = getProjectDao().getSourceForExperiment(this.experiment, sdrfSource.getName());
            if (source == null) {
                source = new Source();
            }
            translateBioMaterial(source, sdrfSource);
            for (final Provider sdrfProvider : sdrfSource.getProviders()) {
                final Organization organization = getOrCreateOrganization(sdrfProvider.getName());
                source.getProviders().add(organization);
            }
            this.nodeTranslations.put(sdrfSource, source);
            this.allSources.add(source);
        }
    }

    private void translateSamples(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Sample sdrfSample : document.getAllSamples()) {
            Sample sample = getProjectDao().getSampleForExperiment(this.experiment, sdrfSample.getName());
            if (sample == null) {
                sample = new Sample();
            }
            translateBioMaterial(sample, sdrfSample);
            this.nodeTranslations.put(sdrfSample, sample);
            this.allSamples.add(sample);
        }
    }

    private void translateExtracts(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Extract sdrfExtract : document.getAllExtracts()) {
            Extract extract = getProjectDao().getExtractForExperiment(this.experiment, sdrfExtract.getName());
            if (extract == null) {
                extract = new Extract();
            }
            translateBioMaterial(extract, sdrfExtract);
            this.nodeTranslations.put(sdrfExtract, extract);
            this.allExtracts.add(extract);
        }
    }

    private void translateLabeledExtracts(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.LabeledExtract sdrfLabeledExtract : document
                .getAllLabeledExtracts()) {
            LabeledExtract labeledExtract = getProjectDao().getLabeledExtractForExperiment(this.experiment,
                    sdrfLabeledExtract.getName());
            if (labeledExtract == null) {
                labeledExtract = new LabeledExtract();
            }
            translateBioMaterial(labeledExtract, sdrfLabeledExtract);
            labeledExtract.setLabel(getTerm(sdrfLabeledExtract.getLabel()));
            this.nodeTranslations.put(sdrfLabeledExtract, labeledExtract);
            this.allLabeledExtracts.add(labeledExtract);
        }
    }

    private void translateHybridizations(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Hybridization sdrfHybridization : document
                .getAllHybridizations()) {
            final String hybridizationName = sdrfHybridization.getName();
            Hybridization hybridization = null;
            if (this.experiment != null) {
                hybridization = this.experiment.getHybridizationByName(hybridizationName);
            }
            // don't update existing hybridizations
            if (hybridization == null) {
                hybridization = new Hybridization();
                hybridization.setName(hybridizationName);
                for (final gov.nih.nci.caarray.magetab.sdrf.FactorValue sdrfFactorVal : sdrfHybridization
                        .getFactorValues()) {
                    final AbstractFactorValue factorValue = translateFactorValue(sdrfFactorVal);
                    hybridization.getFactorValues().add(factorValue);
                    factorValue.setHybridization(hybridization);
                }
                associateProtocolApplications(hybridization.getProtocolApplications(),
                        sdrfHybridization.getProtocolApplications());
                associateProtocolApplications(hybridization.getProtocolApplications(),
                        getAllProtocols(sdrfHybridization.getSuccessorScans()));
            }
            this.allHybridizations.add(hybridization);
            this.nodeTranslations.put(sdrfHybridization, hybridization);
        }
    }

    @SuppressWarnings("PMD.EmptyCatchBlock")
    private AbstractFactorValue translateFactorValue(gov.nih.nci.caarray.magetab.sdrf.FactorValue sdrfFactorVal) {
        AbstractFactorValue factorValue = null;
        final Term unit = getTerm(sdrfFactorVal.getUnit());
        if (sdrfFactorVal.getTerm() != null) {
            factorValue = new TermBasedFactorValue(getTerm(sdrfFactorVal.getTerm()), unit);
        } else {
            if (unit != null) {
                try {
                    factorValue = new MeasurementFactorValue(Float.valueOf(sdrfFactorVal.getValue()), unit);
                } catch (final NumberFormatException e) {
                    // non-Float values that have Units will end up as UserDefined
                }
            }
            if (factorValue == null) {
                factorValue = new UserDefinedFactorValue(sdrfFactorVal.getValue(), unit);
            }
        }
        final Factor factor = getTranslationResult().getFactor(sdrfFactorVal.getFactor());
        factorValue.setFactor(factor);
        factor.getFactorValues().add(factorValue);
        return factorValue;
    }

    private void translateBioMaterial(AbstractBioMaterial bioMaterial,
            gov.nih.nci.caarray.magetab.sdrf.AbstractBioMaterial sdrfBiomaterial) {
        if (bioMaterial.getId() == null) {
            // only set these properties for new bio materials, since only a few properties should be updated
            bioMaterial.setName(sdrfBiomaterial.getName());
            bioMaterial.setDescription(sdrfBiomaterial.getDescription());
            for (final gov.nih.nci.caarray.magetab.ProtocolApplication mageTabProtocolApplication : sdrfBiomaterial
                    .getProtocolApplications()) {
                final ProtocolApplication protocolApplication = getProtocolApplicationFromMageTabProtocolApplication(
                        mageTabProtocolApplication);
                bioMaterial.addProtocolApplication(protocolApplication);
            }
        }
        final Term materialTypeTerm = getTerm(sdrfBiomaterial.getMaterialType());
        if (null != materialTypeTerm && null == materialTypeTerm.getSource()) {
            materialTypeTerm
                    .setSource(this.vocabularyService.getSource(ExperimentOntology.MGED_ONTOLOGY.getOntologyName(),
                            ExperimentOntology.MGED_ONTOLOGY.getVersion()));
        }
        bioMaterial.setMaterialType(materialTypeTerm);
        for (final Characteristic sdrfCharacteristic : sdrfBiomaterial.getCharacteristics()) {
            processSdrfCharacteristic(bioMaterial, sdrfCharacteristic);
        }
    }

    private void processSdrfCharacteristic(final AbstractBioMaterial bioMaterial,
            final Characteristic sdrfCharacteristic) {
        final AbstractCharacteristic characteristic = translateCharacteristic(sdrfCharacteristic);
        final String category = characteristic.getCategory().getName();
        if (ExperimentOntologyCategory.ORGANISM_PART.getCategoryName().equals(category)) {
            bioMaterial.setTissueSite(forceToTerm(characteristic));
        } else if (ExperimentOntologyCategory.CELL_TYPE.getCategoryName().equals(category)) {
            bioMaterial.setCellType(forceToTerm(characteristic));
        } else if (ExperimentOntologyCategory.DISEASE_STATE.getCategoryName().equals(category)) {
            bioMaterial.setDiseaseState(forceToTerm(characteristic));
        } else if (ExperimentOntologyCategory.ORGANISM.getCategoryName().equals(category)) {
            final Organism organism = getOrganism(forceToTerm(characteristic));
            if (null == organism.getTermSource()) {
                organism.setTermSource(this.vocabularyService.getSource(ExperimentOntology.NCBI.getOntologyName(),
                        ExperimentOntology.NCBI.getVersion()));
            }
            bioMaterial.setOrganism(organism);
        } else if (ExperimentOntologyCategory.EXTERNAL_SAMPLE_ID.getCategoryName()
                .equalsIgnoreCase(category.replaceAll("\\s", ""))
                || ExperimentOntologyCategory.EXTERNAL_ID.getCategoryName()
                        .equalsIgnoreCase(category.replaceAll("\\s", ""))) {
            bioMaterial.setExternalId(sdrfCharacteristic.getValue());
        } else {
            for (final AbstractCharacteristic existingCharacteristic : bioMaterial.getCharacteristics()) {
                if (existingCharacteristic.getCategory().equals(characteristic.getCategory())) {
                    bioMaterial.getCharacteristics().remove(existingCharacteristic);
                    getProjectDao().remove(existingCharacteristic);
                    break;
                }
            }
            bioMaterial.getCharacteristics().add(characteristic);
            characteristic.setBioMaterial(bioMaterial);
        }
    }

    private Term forceToTerm(AbstractCharacteristic characteristic) {
        if (characteristic instanceof TermBasedCharacteristic) {
            return ((TermBasedCharacteristic) characteristic).getTerm();
        } else {
            final String value = characteristic.getDisplayValueWithoutUnit();
            final OntologyTerm fakeSdrfTerm = new OntologyTerm();
            fakeSdrfTerm.setValue(value);
            this.termTranslator.translateTerm(fakeSdrfTerm);
            return getTerm(fakeSdrfTerm);
        }
    }

    private Term getUnknownProtocolType() {
        TermSource source = this.vocabularyService.getSource(ExperimentOntology.MGED_ONTOLOGY.getOntologyName(),
                ExperimentOntology.MGED_ONTOLOGY.getVersion());
        return this.vocabularyService.getTerm(source, VocabularyService.UNKNOWN_PROTOCOL_TYPE_NAME);
    }

    private Protocol replaceProtocolIfExists(Protocol p) {
        final ProtocolKey key = new ProtocolKey(p.getName(), p.getSource());

        // check in our map of imported protocols
        Protocol returnProtocol = this.importedProtocolMap.get(key);
        if (returnProtocol == null) {
            // not in the map, check in the db
            returnProtocol = getDaoFactory().getProtocolDao().getProtocol(p.getName(), p.getSource());
        }
        if (returnProtocol == null) {
            // protocol not in the map of imported protocols or in the db, add to map as it will be new
            this.importedProtocolMap.put(key, p);
            returnProtocol = p;
        }
        return returnProtocol;
    }

    /**
     * @param term
     * @return
     */
    private Organism getOrganism(Term term) {
        Organism o = this.termToOrganism.get(term);
        if (o == null && term.getSource().getId() != null) {
            o = this.vocabularyService.getOrganism(term.getSource(), term.getValue());
        }
        if (o == null) {
            o = new Organism();
            o.setScientificName(term.getValue());
            o.setTermSource(term.getSource());
            this.termToOrganism.put(term, o);
        }
        return o;
    }

    private Protocol getProtocolFromMageTabProtocol(gov.nih.nci.caarray.magetab.Protocol mageTabProtocol) {
        Term type = getTerm(mageTabProtocol.getType());
        if (type == null) {
            type = getUnknownProtocolType();
        }
        TermSource termSource = null;
        if (mageTabProtocol.getTermSource() != null) {
            termSource = getTranslationResult().getSource(mageTabProtocol.getTermSource());
        } else {
            termSource = this.vocabularyService.getSource(ExperimentOntology.MGED_ONTOLOGY.getOntologyName(),
                    ExperimentOntology.MGED_ONTOLOGY.getVersion());
        }
        Protocol p = new Protocol(mageTabProtocol.getName(), type, termSource);
        p.setContact(mageTabProtocol.getContact());
        p.setDescription(mageTabProtocol.getDescription());
        p.setHardware(mageTabProtocol.getHardware());
        p.setSoftware(mageTabProtocol.getSoftware());
        p = replaceProtocolIfExists(p);
        return p;
    }

    @SuppressWarnings("PMD.EmptyCatchBlock")
    private ProtocolApplication getProtocolApplicationFromMageTabProtocolApplication(
            gov.nih.nci.caarray.magetab.ProtocolApplication mageTabProtocolApplication) {
        final Protocol protocol = getProtocolFromMageTabProtocol(mageTabProtocolApplication.getProtocol());
        final ProtocolApplication protocolApplication = new ProtocolApplication();
        protocolApplication.setProtocol(protocol);
        for (final gov.nih.nci.caarray.magetab.ParameterValue mageTabValue : mageTabProtocolApplication
                .getParameterValues()) {
            AbstractParameterValue value = null;
            final Term unit = getTerm(mageTabValue.getUnit());
            if (mageTabValue.getTerm() != null) {
                value = new TermBasedParameterValue(getTerm(mageTabValue.getTerm()), unit);
            } else {
                if (unit != null) {
                    try {
                        value = new MeasurementParameterValue(Float.valueOf(mageTabValue.getValue()), unit);
                    } catch (final NumberFormatException e) {
                        // non-Float values that have Units will end up as UserDefined
                    }
                }
                if (value == null) {
                    value = new UserDefinedParameterValue(mageTabValue.getValue(), unit);
                }
            }
            if (mageTabValue.getParameter() != null) {
                final Parameter param = getOrCreateParameter(mageTabValue.getParameter().getName(), protocol);
                value.setParameter(param);
            }
            value.setProtocolApplication(protocolApplication);
            protocolApplication.getValues().add(value);
        }
        return protocolApplication;
    }

    private Parameter getOrCreateParameter(String name, Protocol protocol) {
        Parameter param = (Parameter) this.paramMap.get(name, protocol);
        if (param == null) {
            param = this.getDaoFactory().getProtocolDao().getParameter(name, protocol);
        }
        if (param == null) {
            param = new Parameter(name, protocol);
            this.paramMap.put(name, protocol, param);
        }
        return param;
    }

    @SuppressWarnings("PMD.EmptyCatchBlock")
    private AbstractCharacteristic translateCharacteristic(Characteristic sdrfCharacteristic) {
        final Category category = TermTranslator.getOrCreateCategory(this.vocabularyService,
                this.getTranslationResult(), sdrfCharacteristic.getCategory());

        final Term unit = getTerm(sdrfCharacteristic.getUnit());
        AbstractCharacteristic chr = null;
        if (sdrfCharacteristic.getTerm() != null) {
            chr = new TermBasedCharacteristic(category, getTerm(sdrfCharacteristic.getTerm()), unit);
        } else {
            if (unit != null) {
                try {
                    chr = new MeasurementCharacteristic(category, Float.valueOf(sdrfCharacteristic.getValue()),
                            unit);
                } catch (final NumberFormatException e) {
                    // non-Float values that have Units will end up as UserDefined
                }
            }
            if (chr == null) {
                chr = new UserDefinedCharacteristic(category, sdrfCharacteristic.getValue(), unit);
            }
        }
        return chr;
    }

    // Translates array designs to a linked array-array design pair in the caArray domain.
    private void translateArrayDesigns(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDesign sdrfArrayDesign : document.getAllArrayDesigns()) {
            final ArrayDesign arrayDesign = getArrayDesign(sdrfArrayDesign);
            getTranslationResult().getArrayDesigns().add(arrayDesign);
            if (getTranslationResult().getInvestigations().size() > 0) {
                getTranslationResult().getInvestigations().iterator().next().getArrayDesigns().add(arrayDesign);
            }
        }
    }

    /**
     * Get a caArray ArrayDesign object from an MAGETAB ArrayDesign.
     * 
     * @param sdrfArrayDesign MAGETAB array design - must not be null
     */
    private ArrayDesign getArrayDesign(gov.nih.nci.caarray.magetab.sdrf.ArrayDesign sdrfArrayDesign) {
        return processArrayDesignRef(sdrfArrayDesign.getValue());
    }

    // Process a reference to an array design in the caArray or in an external database.
    private ArrayDesign processArrayDesignRef(String arrayDesignName) {
        final ArrayDesign arrayDesign = new ArrayDesign();
        arrayDesign.setLsidForEntity(arrayDesignName);
        final List<ArrayDesign> designs = getDaoFactory().getArrayDao().queryEntityByExample(arrayDesign);
        if (designs.isEmpty()) {
            return null;
        } else {
            return designs.get(0);
        }
    }

    private void translateImages(SdrfDocument document) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Image sdrfImage : document.getAllImages()) {
            final String imageName = sdrfImage.getName();
            final CaArrayFile imageFile = getFile(imageName);
            if (imageFile != null) {
                final Image image = new Image();
                image.setName(imageName);
                image.setImageFile(imageFile);
                this.nodeTranslations.put(sdrfImage, image);
            }
        }
    }

    private void translateRawArrayData(SdrfDocument document) {
        // Translate native raw data files.
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDataFile sdrfData : document.getAllArrayDataFiles()) {
            translateIndividualRawArrayDataFile(sdrfData, false);
        }
        // Translate MAGE-TAB raw data matrix files.
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDataMatrixFile sdrfData : document
                .getAllArrayDataMatrixFiles()) {
            translateIndividualRawArrayDataFile(sdrfData, true);
        }
    }

    private void translateIndividualRawArrayDataFile(
            gov.nih.nci.caarray.magetab.sdrf.AbstractSampleDataRelationshipNode sdrfData, boolean isMatrix) {
        final String fileName = sdrfData.getName();
        final CaArrayFile dataFile = getFile(fileName);
        if (dataFile == null) {
            // The file could be coming in a future import, possibly due to import splitting.
            return;
        }
        RawArrayData caArrayData = null;
        if (EnumSet.of(FileStatus.IMPORTED, FileStatus.IMPORTED_NOT_PARSED).contains(dataFile.getFileStatus())) {
            // this is a re-import referencing an existing data file
            caArrayData = (RawArrayData) getDaoFactory().getArrayDao().getArrayData(dataFile.getId());
        } else {
            caArrayData = new RawArrayData();
            caArrayData.setName(fileName);
            caArrayData.setDataFile(dataFile);

            final Set<gov.nih.nci.caarray.magetab.ProtocolApplication> all = new HashSet<gov.nih.nci.caarray.magetab.ProtocolApplication>();
            all.addAll(sdrfData.getProtocolApplications());
            for (final DerivedArrayDataFile df : sdrfData.getSuccessorDerivedArrayDataFiles()) {
                all.addAll(getAllProtocols(df.getPredecessorNormalizations()));
            }
            for (final DerivedArrayDataMatrixFile df : sdrfData.getSuccessorDerivedArrayDataMatrixFiles()) {
                all.addAll(getAllProtocols(df.getPredecessorNormalizations()));
            }
            all.addAll(getAllProtocols(sdrfData.getSuccessorNormalizations()));

            associateProtocolApplications(caArrayData.getProtocolApplications(), all);
            this.nodeTranslations.put(sdrfData, caArrayData);
        }
        this.nodeTranslations.put(sdrfData, caArrayData);
    }

    private void translateDerivedArrayData(SdrfDocument document) {
        // Translate native derived data files.
        for (final gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataFile sdrfData : document
                .getAllDerivedArrayDataFiles()) {
            translateIndividualDerivedArrayDataFile(sdrfData, false);
        }
        // Translate MAGE-TAB derived data matrix files.
        for (final gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataMatrixFile sdrfData : document
                .getAllDerivedArrayDataMatrixFiles()) {
            translateIndividualDerivedArrayDataFile(sdrfData, true);
        }
    }

    private void translateIndividualDerivedArrayDataFile(
            gov.nih.nci.caarray.magetab.sdrf.AbstractSampleDataRelationshipNode sdrfData, boolean isDataMatrix) {
        final String fileName = sdrfData.getName();
        final CaArrayFile dataFile = getFile(fileName);
        if (dataFile == null) {
            // The file could be coming in a future import, possibly due to import splitting.
            return;
        }
        DerivedArrayData caArrayData = null;
        if (EnumSet.of(FileStatus.IMPORTED, FileStatus.IMPORTED_NOT_PARSED).contains(dataFile.getFileStatus())) {
            caArrayData = (DerivedArrayData) getDaoFactory().getArrayDao().getArrayData(dataFile.getId());
        } else {
            caArrayData = new DerivedArrayData();
            caArrayData.setName(fileName);
            caArrayData.setDataFile(dataFile);
            associateProtocolApplications(caArrayData.getProtocolApplications(),
                    sdrfData.getProtocolApplications());
        }
        setDerivedFromData(sdrfData, caArrayData);
        this.nodeTranslations.put(sdrfData, caArrayData);
    }

    private void setDerivedFromData(gov.nih.nci.caarray.magetab.sdrf.AbstractSampleDataRelationshipNode sdrfData,
            DerivedArrayData caArrayData) {
        final List<AbstractSampleDataRelationshipNode> allArrayData = new ArrayList<AbstractSampleDataRelationshipNode>(
                sdrfData.getPredecessorArrayDataFiles());

        allArrayData.addAll(sdrfData.getPredecessorArrayDataMatrixFiles());
        allArrayData.addAll(sdrfData.getPredecessorDerivedArrayDataFiles());
        allArrayData.addAll(sdrfData.getPredecessorDerivedArrayDataMatrixFiles());

        for (final AbstractSampleDataRelationshipNode sdrfArrayData : allArrayData) {
            final AbstractArrayData arrayData = (AbstractArrayData) this.nodeTranslations.get(sdrfArrayData);
            if (arrayData != null) {
                caArrayData.getDerivedFromArrayDataCollection().add(arrayData);
            }
        }
    }

    private void associateProtocolApplications(Collection<ProtocolApplication> dest,
            Collection<gov.nih.nci.caarray.magetab.ProtocolApplication> sdrfProtocolapplications) {
        for (final gov.nih.nci.caarray.magetab.ProtocolApplication mtProtocolApp : sdrfProtocolapplications) {
            final ProtocolApplication protocolApplication = getProtocolApplicationFromMageTabProtocolApplication(
                    mtProtocolApp);
            dest.add(protocolApplication);

        }
    }

    private static Set<gov.nih.nci.caarray.magetab.ProtocolApplication> getAllProtocols(
            Set<? extends AbstractSampleDataRelationshipNode> nodes) {
        final HashSet<gov.nih.nci.caarray.magetab.ProtocolApplication> all = new HashSet<gov.nih.nci.caarray.magetab.ProtocolApplication>();
        for (final AbstractSampleDataRelationshipNode n : nodes) {
            all.addAll(n.getProtocolApplications());
        }
        return all;
    }

    private void linkNodes(SdrfDocument document) {
        for (final AbstractSampleDataRelationshipNode currNode : document.getLeftmostNodes()) {
            linkNode(currNode);
        }
    }

    // Recursively link this node to its successors.
    // Does not handle <code>Comment</code> entities.
    // Assumes that nodes occur in the order: Source, Sample, Extract, LabeledExtract, Hybridization,
    // Scan, Raw Data, Normalization, Derived Data. Any of these nodes are optional, and Image
    // can occur anywhere after Hybridization.
    private void linkNode(AbstractSampleDataRelationshipNode node) {
        // Check if we already linked this node before.
        final Boolean isLinked = this.isNodeLinked.get(node);
        if ((isLinked != null) && (isLinked.booleanValue())) {
            return;
        }
        for (final AbstractSampleDataRelationshipNode successor : node.getSuccessors()) {
            // Recursively link all successors of this node.
            linkNode(successor);
            // Link this node to its successor.
            linkTwoNodes(node, successor);
        }
        // Finished linking node. Mark it so that we don't do it again.
        this.isNodeLinked.put(node, Boolean.TRUE);
    }

    // Link a node with one successor.
    private void linkTwoNodes(AbstractSampleDataRelationshipNode leftNode,
            AbstractSampleDataRelationshipNode rightNode) {
        final AbstractCaArrayObject leftCaArrayNode = this.nodeTranslations.get(leftNode);
        final AbstractCaArrayObject rightCaArrayNode = this.nodeTranslations.get(rightNode);
        final SdrfNodeType leftNodeType = leftNode.getNodeType();
        final SdrfNodeType rightNodeType = rightNode.getNodeType();
        // if either node is null, it means it wasn't translated (because it was an update of existing data and that
        // type of node doesn't get updated), so don't try to link it
        if (isBioMaterial(leftNodeType) && rightCaArrayNode != null) {
            // Use the left node's name as part of any generated biomaterial names.
            final String baseGeneratedNodeName = ((AbstractBioMaterial) leftCaArrayNode).getName();
            final List<ProtocolApplication> pas = ((AbstractBioMaterial) leftCaArrayNode).getProtocolApplications();
            linkBioMaterial(leftCaArrayNode, rightCaArrayNode, leftNodeType, rightNodeType, baseGeneratedNodeName,
                    pas);
        } else if (SdrfNodeType.HYBRIDIZATION.equals(leftNodeType)) {
            final Hybridization hybridization = (Hybridization) leftCaArrayNode;
            linkHybridizationToArrays((gov.nih.nci.caarray.magetab.sdrf.Hybridization) leftNode, hybridization);
            linkHybridizationToImages((gov.nih.nci.caarray.magetab.sdrf.Hybridization) leftNode, hybridization);
            linkHybridizationToArrayData((gov.nih.nci.caarray.magetab.sdrf.Hybridization) leftNode, hybridization);
        } else {
            // Ignore other nodes - Image, Scan, Raw/Derived Data, Normalization; they've already been linked.
            return;
        }
    }

    private void linkHybridizationToArrays(gov.nih.nci.caarray.magetab.sdrf.Hybridization sdrfHybridization,
            Hybridization hybridization) {
        final Array array = new Array();
        final Array currArray = hybridization.getArray();
        // a new hyb should always have an array, even if the array isn't associated with a design
        if (currArray == null) {
            hybridization.setArray(array);
        }

        // if the sdrf hyb has an array design, only associate it with the array if the current hyb
        // doesn't have an array or if the new design is different from the old one
        if (sdrfHybridization.getArrayDesign() != null) {
            final ArrayDesign sdrfArrayDesign = getArrayDesign(sdrfHybridization.getArrayDesign());
            if (sdrfArrayDesign != null
                    && (currArray == null || !currArray.getDesign().getLsid().equals(sdrfArrayDesign.getLsid()))) {
                array.setDesign(sdrfArrayDesign);
                hybridization.setArray(array);
            }
        }
    }

    private void linkHybridizationToImages(gov.nih.nci.caarray.magetab.sdrf.Hybridization sdrfHybridization,
            Hybridization hybridization) {
        for (final gov.nih.nci.caarray.magetab.sdrf.Image sdrfImage : sdrfHybridization.getSuccessorImages()) {
            final Image image = (Image) this.nodeTranslations.get(sdrfImage);
            if (image != null) {
                hybridization.getImages().add(image);
            }
        }
    }

    private void linkHybridizationToArrayData(gov.nih.nci.caarray.magetab.sdrf.Hybridization sdrfHybridization,
            Hybridization hybridization) {
        // Link raw array data
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDataFile sdrfArrayData : sdrfHybridization
                .getSuccessorArrayDataFiles()) {
            final RawArrayData arrayData = (RawArrayData) this.nodeTranslations.get(sdrfArrayData);
            if (arrayData != null) {
                arrayData.addHybridization(hybridization);
                hybridization.getRawDataCollection().add(arrayData);
            }
        }
        for (final gov.nih.nci.caarray.magetab.sdrf.ArrayDataMatrixFile sdrfArrayData : sdrfHybridization
                .getSuccessorArrayDataMatrixFiles()) {
            final RawArrayData arrayData = (RawArrayData) this.nodeTranslations.get(sdrfArrayData);
            if (arrayData != null) {
                arrayData.addHybridization(hybridization);
                hybridization.getRawDataCollection().add(arrayData);
            }
        }
        // Link derived array data
        for (final gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataFile sdrfArrayData : sdrfHybridization
                .getSuccessorDerivedArrayDataFiles()) {
            final DerivedArrayData arrayData = (DerivedArrayData) this.nodeTranslations.get(sdrfArrayData);
            if (arrayData != null) {
                arrayData.addHybridization(hybridization);
                hybridization.getDerivedDataCollection().add(arrayData);
            }
        }
        for (final gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataMatrixFile sdrfArrayData : sdrfHybridization
                .getSuccessorDerivedArrayDataMatrixFiles()) {
            final DerivedArrayData arrayData = (DerivedArrayData) this.nodeTranslations.get(sdrfArrayData);
            if (arrayData != null) {
                arrayData.addHybridization(hybridization);
                hybridization.getDerivedDataCollection().add(arrayData);
            }
        }
    }

    /**
     * Links a BioMaterial node with one successor. If a node is missing in the chain Source -> Sample -> Extract ->
     * LabeledExtract -> Hybridization, appropriate intermediate nodes will be generated to complete the chain. The
     * number of nodes generated depends on the left side of the graph. E.g., 1 Source going to 3 Extracts will result
     * in 1 Sample being generated. On the other hand, 3 Sources going to 1 Extract will result in 3 Samples being
     * generated.
     */
    @SuppressWarnings("PMD")
    private void linkBioMaterial(AbstractCaArrayObject leftCaArrayNode, AbstractCaArrayObject rightCaArrayNode,
            SdrfNodeType leftNodeType, SdrfNodeType rightNodeType, String baseGeneratedNodeName,
            Collection<ProtocolApplication> protocolApplications) {
        if (leftNodeType.equals(SdrfNodeType.SOURCE)) {
            if (rightNodeType.equals(SdrfNodeType.SAMPLE)) {
                linkSourceAndSample((Source) leftCaArrayNode, (Sample) rightCaArrayNode);
            } else {
                final Sample generatedSample = generateSampleAndLink(baseGeneratedNodeName,
                        (Source) leftCaArrayNode);
                reassociateProtocolApplications(generatedSample, protocolApplications);
                linkBioMaterial(generatedSample, rightCaArrayNode, SdrfNodeType.SAMPLE, rightNodeType,
                        baseGeneratedNodeName, protocolApplications);
            }
        } else if (leftNodeType.equals(SdrfNodeType.SAMPLE)) {
            if (rightNodeType.equals(SdrfNodeType.EXTRACT)) {
                linkSampleAndExtract((Sample) leftCaArrayNode, (Extract) rightCaArrayNode);
            } else {
                final Extract generatedExtract = generateExtractAndLink(baseGeneratedNodeName,
                        (Sample) leftCaArrayNode);
                reassociateProtocolApplications(generatedExtract, protocolApplications);
                linkBioMaterial(generatedExtract, rightCaArrayNode, SdrfNodeType.EXTRACT, rightNodeType,
                        baseGeneratedNodeName, protocolApplications);
            }
        } else if (leftNodeType.equals(SdrfNodeType.EXTRACT)) {
            if (rightNodeType.equals(SdrfNodeType.LABELED_EXTRACT)) {
                linkExtractAndLabeledExtract((Extract) leftCaArrayNode, (LabeledExtract) rightCaArrayNode);
            } else {
                final LabeledExtract generatedLabeledExtract = generateLabeledExtractAndLink(baseGeneratedNodeName,
                        (Extract) leftCaArrayNode);
                reassociateProtocolApplications(generatedLabeledExtract, protocolApplications);
                linkBioMaterial(generatedLabeledExtract, rightCaArrayNode, SdrfNodeType.LABELED_EXTRACT,
                        rightNodeType, baseGeneratedNodeName, protocolApplications);
            }
        } else if (leftNodeType.equals(SdrfNodeType.LABELED_EXTRACT)
                && rightNodeType.equals(SdrfNodeType.HYBRIDIZATION)) {
            linkLabeledExtractAndHybridization((LabeledExtract) leftCaArrayNode, (Hybridization) rightCaArrayNode);
        }
    }

    private void reassociateProtocolApplications(AbstractBioMaterial bioMaterial,
            Collection<ProtocolApplication> protocolApplications) {
        for (final Iterator<ProtocolApplication> i = protocolApplications.iterator(); i.hasNext();) {
            final ProtocolApplication pa = i.next();
            final Term protocolType = pa.getProtocol().getType();
            for (final ProtocolTypeAssociation typeAssoc : ProtocolTypeAssociation.values()) {
                if (protocolType.getValue().equals(typeAssoc.getValue())
                        && protocolType.getSource().getName().equals(typeAssoc.getSource())
                        && bioMaterial.getClass().equals(typeAssoc.getNodeClass())) {
                    bioMaterial.addProtocolApplication(pa);
                    i.remove();
                }
            }
        }
    }

    private Sample generateSampleAndLink(String baseGeneratedNodeName, Source source) {
        // Generate sample if not already generated.
        final String sampleName = GENERATED_SAMPLE_PREFIX + baseGeneratedNodeName;
        Sample generatedSample = (Sample) this.generatedNodes.get(sampleName);
        if (generatedSample == null) {
            generatedSample = getProjectDao().getSampleForExperiment(this.experiment, sampleName);
            if (generatedSample == null) {
                generatedSample = new Sample();
                generatedSample.setName(sampleName);
            }

            this.generatedNodes.put(sampleName, generatedSample);
            this.allSamples.add(generatedSample);
        }
        linkSourceAndSample(source, generatedSample);
        return generatedSample;
    }

    private Extract generateExtractAndLink(String baseGeneratedNodeName, Sample generatedSample) {
        // Generate extract if not already generated.
        final String extractName = GENERATED_EXTRACT_PREFIX + baseGeneratedNodeName;
        Extract generatedExtract = (Extract) this.generatedNodes.get(extractName);
        if (generatedExtract == null) {
            generatedExtract = getProjectDao().getExtractForExperiment(this.experiment, extractName);
            if (generatedExtract == null) {
                generatedExtract = new Extract();
                generatedExtract.setName(extractName);
            }
            this.generatedNodes.put(extractName, generatedExtract);
            this.allExtracts.add(generatedExtract);
        }
        linkSampleAndExtract(generatedSample, generatedExtract);
        return generatedExtract;
    }

    private LabeledExtract generateLabeledExtractAndLink(String baseGeneratedNodeName, Extract generatedExtract) {
        // Generate labeled extract if not already generated.
        final String labeledExtractName = GENERATED_LABELED_EXTRACT_PREFIX + baseGeneratedNodeName;
        LabeledExtract generatedLabeledExtract = (LabeledExtract) this.generatedNodes.get(labeledExtractName);
        if (generatedLabeledExtract == null) {
            generatedLabeledExtract = getProjectDao().getLabeledExtractForExperiment(this.experiment,
                    labeledExtractName);
            if (generatedLabeledExtract == null) {
                generatedLabeledExtract = new LabeledExtract();
                generatedLabeledExtract.setName(labeledExtractName);
            }
            this.generatedNodes.put(labeledExtractName, generatedLabeledExtract);
            this.allLabeledExtracts.add(generatedLabeledExtract);
        }
        linkExtractAndLabeledExtract(generatedExtract, generatedLabeledExtract);
        return generatedLabeledExtract;
    }

    private void linkSourceAndSample(Source source, Sample sample) {
        source.getSamples().add(sample);
        sample.getSources().add(source);
    }

    private void linkSampleAndExtract(Sample sample, Extract extract) {
        sample.getExtracts().add(extract);
        extract.getSamples().add(sample);
    }

    private void linkExtractAndLabeledExtract(Extract extract, LabeledExtract labeledExtract) {
        extract.getLabeledExtracts().add(labeledExtract);
        labeledExtract.getExtracts().add(extract);
    }

    private void linkLabeledExtractAndHybridization(LabeledExtract labeledExtract, Hybridization hybridization) {
        hybridization.getLabeledExtracts().add(labeledExtract);
        labeledExtract.getHybridizations().add(hybridization);
    }

    private boolean isBioMaterial(SdrfNodeType nodeType) {
        if (nodeType.equals(SdrfNodeType.SOURCE) || nodeType.equals(SdrfNodeType.SAMPLE)
                || nodeType.equals(SdrfNodeType.EXTRACT) || nodeType.equals(SdrfNodeType.LABELED_EXTRACT)) {
            return true;
        }
        return false;
    }

    private CaArrayFile getFile(String name) {
        // check both files included in import and already imported files
        final CaArrayFileSet fs = new CaArrayFileSet(getFileSet());
        if (this.experiment != null) {
            fs.addAll(Collections2.filter(this.experiment.getProject().getImportedFiles(),
                    new Predicate<CaArrayFile>() {
                        @Override
                        public boolean apply(CaArrayFile f) {
                            return f.getFileType().isArrayData();
                        }
                    }));
        }
        return fs.getFile(name);
    }

    @Override
    Logger getLog() {
        return LOG;
    }
}