com.b2international.snowowl.snomed.importer.rf2.validation.SnomedTaxonomyValidator.java Source code

Java tutorial

Introduction

Here is the source code for com.b2international.snowowl.snomed.importer.rf2.validation.SnomedTaxonomyValidator.java

Source

/*
 * Copyright 2011-2018 B2i Healthcare Pte Ltd, http://b2i.sg
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.b2international.snowowl.snomed.importer.rf2.validation;

import static com.b2international.snowowl.snomed.common.ContentSubType.SNAPSHOT;
import static java.util.Collections.emptySet;
import static java.util.Collections.singleton;
import static java.util.stream.Collectors.toList;
import static org.slf4j.LoggerFactory.getLogger;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;

import javax.annotation.Nullable;

import org.slf4j.Logger;

import com.b2international.collections.longs.LongCollection;
import com.b2international.snowowl.datastore.server.snomed.index.init.Rf2BasedSnomedTaxonomyBuilder;
import com.b2international.snowowl.snomed.SnomedConstants.Concepts;
import com.b2international.snowowl.snomed.datastore.taxonomy.InvalidRelationship;
import com.b2international.snowowl.snomed.datastore.taxonomy.InvalidRelationship.MissingConcept;
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyBuilder;
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyStatus;
import com.b2international.snowowl.snomed.importer.net4j.DefectType;
import com.b2international.snowowl.snomed.importer.net4j.ImportConfiguration;
import com.b2international.snowowl.snomed.importer.net4j.SnomedIncompleteTaxonomyValidationDefect;
import com.b2international.snowowl.snomed.importer.net4j.SnomedValidationDefect;
import com.b2international.snowowl.snomed.importer.rf2.RepositoryState;
import com.b2international.snowowl.snomed.importer.rf2.util.Rf2FileModifier;
import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Multimap;

/**
 * Class for validating the taxonomy of active concepts and active IS_A relationships.
 *
 */
public class SnomedTaxonomyValidator {

    private static final Logger LOGGER = getLogger(SnomedTaxonomyValidator.class);

    private static final Comparator<String> EFFECTIVE_TIME_COMPARATOR = new Comparator<String>() {
        @Override
        public int compare(String o1, String o2) {
            // consider empty greater than non-empty
            if (o1.isEmpty() && !o2.isEmpty()) {
                return 1;
            } else if (!o1.isEmpty() && o2.isEmpty()) {
                return -1;
            }
            return o1.compareTo(o2);
        }
    };

    // new RF2 state
    private final File conceptsFile;
    private final File relationshipsFile;
    private final boolean snapshot;
    // current store state
    private final String characteristicType;
    private final LongCollection conceptIds;
    private final Collection<String[]> statements;

    public SnomedTaxonomyValidator(final ImportConfiguration configuration, final RepositoryState repositoryState,
            final String characteristicType) {
        this.characteristicType = characteristicType;
        this.snapshot = SNAPSHOT.equals(configuration.getContentSubType());
        this.conceptsFile = configuration.getConceptFile();
        this.conceptIds = repositoryState.getConceptIds();
        this.statements = Concepts.INFERRED_RELATIONSHIP.equals(characteristicType)
                ? repositoryState.getInferredStatements()
                : repositoryState.getStatedStatements();

        if (Concepts.STATED_RELATIONSHIP.equals(characteristicType)) {
            relationshipsFile = configuration.getStatedRelationshipFile();
        } else if (Concepts.INFERRED_RELATIONSHIP.equals(characteristicType)) {
            relationshipsFile = configuration.getRelationshipFile();
        } else {
            throw new IllegalArgumentException("Collection mode " + characteristicType + " is not allowed.");
        }
    }

    /**
     * Schematically validates the RF2 file by building a taxonomy between the concepts
     * and the active IS_A relationships. Returns with an empty collection if there were no
     * validation errors otherwise returns with a collection of {@link SnomedValidationDefect defects}
     * representing the problems.
     * @return
     */
    public Collection<SnomedValidationDefect> validate() {
        if (canValidate()) {
            return doValidate();
        }
        return Collections.emptySet();
    }

    /*
     * Two major use cases exist:
     *  |
     *  + - snapshot import
     *  | |
     *  | +-> build taxonomy based on the content and apply the changes from the release files.
     *  |
     *  + - full or delta import
     *    |
     *    +-> build the taxonomy from the file. in case of full import split the files based effective times.
     *  
     */
    private Collection<SnomedValidationDefect> doValidate() {

        try {

            final Multimap<String, InvalidRelationship> invalidRelationships = processTaxonomy();

            if (!invalidRelationships.isEmpty()) {

                String messageWithEffectiveTime = "'%s' relationship refers to an inactive concept '%s' (%s) in effective time '%s'";
                String messageWithOutEffectiveTime = "'%s' relationship refers to an inactive concept '%s' (%s)";

                List<String> validationMessages = invalidRelationships.asMap().entrySet().stream()
                        .flatMap(entry -> {

                            String effectiveTime = entry.getKey();
                            Collection<InvalidRelationship> relationships = entry.getValue();

                            return relationships.stream().map(relationship -> {

                                String relationshipId = String.valueOf(relationship.getRelationshipId());

                                String missingReference = MissingConcept.DESTINATION == relationship
                                        .getMissingConcept() ? String.valueOf(relationship.getDestinationId())
                                                : String.valueOf(relationship.getSourceId());

                                String missingReferenceLabel = relationship.getMissingConcept().getLabel();

                                if (!Strings.isNullOrEmpty(effectiveTime)) {
                                    return String.format(messageWithEffectiveTime, relationshipId, missingReference,
                                            missingReferenceLabel, effectiveTime);
                                } else {
                                    return String.format(messageWithOutEffectiveTime, relationshipId,
                                            missingReference, missingReferenceLabel);
                                }

                            });

                        }).collect(toList());

                LOGGER.info("{} SNOMED CT ontology validation successfully finished. {} taxonomy {} identified.",
                        Concepts.STATED_RELATIONSHIP.equals(characteristicType) ? "Stated" : "Inferred",
                        validationMessages.size(), validationMessages.size() > 1 ? "issues were" : "issue was");

                return singleton(new SnomedIncompleteTaxonomyValidationDefect(relationshipsFile.getName(),
                        validationMessages));

            }

        } catch (final IOException e) {
            LOGGER.error("Validation failed.", e);
            return singleton(new SnomedValidationDefect(relationshipsFile.getName(), DefectType.IO_PROBLEM,
                    Collections.<String>emptySet()));
        }

        LOGGER.info("{} SNOMED CT ontology validation successfully finished. No errors were found.",
                Concepts.STATED_RELATIONSHIP.equals(characteristicType) ? "Stated" : "Inferred");

        return emptySet();
    }

    private Multimap<String, InvalidRelationship> processTaxonomy() throws IOException {
        final Rf2BasedSnomedTaxonomyBuilder builder = Rf2BasedSnomedTaxonomyBuilder
                .newInstance(new SnomedTaxonomyBuilder(conceptIds, statements), characteristicType);
        final Multimap<String, InvalidRelationship> invalidRelationships = ArrayListMultimap.create();
        if (snapshot) {

            LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files...");

            if (hasConceptImport()) {
                final String conceptFilePath = removeConceptHeader();
                builder.applyNodeChanges(conceptFilePath);
            }

            if (hasRelationshipImport()) {
                final String relationshipFilePath = removeRelationshipHeader();
                builder.applyEdgeChanges(relationshipFilePath);
            }

            final SnomedTaxonomyStatus result = builder.build();
            if (!result.getStatus().isOK()) {
                invalidRelationships.putAll("", result.getInvalidRelationships());
            }

        } else {

            LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files...");

            final Map<String, File> conceptFiles = hasConceptImport() ? Rf2FileModifier.split(conceptsFile)
                    : ImmutableMap.<String, File>of();
            final Map<String, File> relationshipFiles = hasRelationshipImport()
                    ? Rf2FileModifier.split(relationshipsFile)
                    : ImmutableMap.<String, File>of();

            final List<String> effectiveTimes = ImmutableSortedSet.orderedBy(EFFECTIVE_TIME_COMPARATOR)
                    .addAll(conceptFiles.keySet()).addAll(relationshipFiles.keySet()).build().asList();

            for (final String effectiveTime : effectiveTimes) {
                LOGGER.info("Validating taxonomy in '{}'...", effectiveTime);

                final File conceptFile = conceptFiles.get(effectiveTime);
                final File relationshipFile = relationshipFiles.get(effectiveTime);

                builder.applyNodeChanges(getFilePath(conceptFile));
                builder.applyEdgeChanges(getFilePath(relationshipFile));
                final SnomedTaxonomyStatus result = builder.build();
                if (!result.getStatus().isOK()) {
                    invalidRelationships.putAll(effectiveTime, result.getInvalidRelationships());
                }
            }

        }

        return invalidRelationships;
    }

    private String getFilePath(@Nullable final File file) {
        return null == file ? null : file.getPath();
    }

    private String removeConceptHeader() throws IOException {
        return Rf2FileModifier.removeHeader(conceptsFile).getPath();
    }

    private String removeRelationshipHeader() throws IOException {
        return Rf2FileModifier.removeHeader(relationshipsFile).getPath();
    }

    private boolean canValidate() {
        return hasConceptImport() || hasRelationshipImport();
    }

    private boolean hasConceptImport() {
        return null != conceptsFile && !conceptsFile.getPath().isEmpty();
    }

    private boolean hasRelationshipImport() {
        return null != relationshipsFile && !relationshipsFile.getPath().isEmpty();
    }
}