org.phenotips.data.internal.OmimInformationContentPatientScorer.java Source code

Java tutorial

Introduction

Here is the source code for org.phenotips.data.internal.OmimInformationContentPatientScorer.java

Source

/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses/
 */
package org.phenotips.data.internal;

import org.phenotips.data.Feature;
import org.phenotips.data.Patient;
import org.phenotips.data.PatientScorer;
import org.phenotips.data.PatientSpecificity;
import org.phenotips.vocabulary.Vocabulary;
import org.phenotips.vocabulary.VocabularyTerm;

import org.xwiki.component.annotation.Component;
import org.xwiki.component.phase.Initializable;
import org.xwiki.component.phase.InitializationException;

import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.TimeZone;

import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

/**
 * Patient scorer that computes a local score based on the information content provided by the selected positive and
 * negative features with regards to identifying OMIM disorders.
 *
 * @version $Id: 53c6b86c0c2fdca3e217faaf8b4b36f69438f658 $
 * @since 1.0M12
 */
@Component
@Named("omimInformationContent")
@Singleton
public class OmimInformationContentPatientScorer implements PatientScorer, Initializable {
    private static final String SEARCH_FOR = "symptom";

    /** Provides access to the OMIM ontology, where the information content is checked. */
    @Inject
    @Named("omim")
    private Vocabulary omim;

    /** The HPO ontology, needed for accessing the ancestors of a term that might not be present in OMIM. */
    @Inject
    @Named("hpo")
    private Vocabulary hpo;

    /** The total information present in OMIM that is reachable through phenotypes. */
    private double totalTerms;

    @Override
    public void initialize() throws InitializationException {
        this.totalTerms = this.omim.count(Collections.singletonMap(SEARCH_FOR, "HP:0000001"));
    }

    @Override
    public PatientSpecificity getSpecificity(Patient patient) {
        double score = getScore(patient);
        return new PatientSpecificity(score, now(), "local-omim");
    }

    @Override
    public double getScore(Patient patient) {
        Pair<Double, Integer> symptomsScore = process(patient, true);
        Pair<Double, Integer> negativeSymptomsScore = process(patient, false);
        double score = 0;

        if (symptomsScore.getRight() + negativeSymptomsScore.getRight() > 0) {
            score = 2 * Math.atan(symptomsScore.getLeft() / 10 + negativeSymptomsScore.getLeft() / 20) / Math.PI;
        }
        return score;
    }

    /**
     * Compute the information content of a patient's positive or negative symptoms.
     *
     * @param p the patient profile to score
     * @param presentFeatures whether the score for positive ({@code true}) or negative ({@code false}) features is
     *            computed
     * @return the score (information content) and the number of features
     */
    private Pair<Double, Integer> process(Patient p, boolean presentFeatures) {
        double score = 0;
        int count = 0;
        for (Feature f : p.getFeatures()) {
            if (StringUtils.isNotEmpty(f.getId()) && f.isPresent() == presentFeatures) {
                score += informationContent(f);
                count++;
            }
        }
        return new ImmutablePair<Double, Integer>(score, count);
    }

    /**
     * How much information is captured by a feature? In other words, how many diseases are selected by a feature out of
     * the total selectable diseases. If a feature doesn't select any diseases at all, the information content of its
     * nearest represented ancestor is considered, with a slight boost for even more specificity.
     *
     * @param f the target feature to measure
     * @return the information content captured by this term
     */
    private double informationContent(Feature f) {
        String toSearch = f.getId();
        double ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch)));
        int i = 0;

        while (ic == 0 && ++i < 5) {
            VocabularyTerm term = this.hpo.getTerm(toSearch);
            if (term == null) {
                break;
            }
            Set<VocabularyTerm> parents = term.getParents();
            if (parents.isEmpty()) {
                break;
            }
            toSearch = parents.iterator().next().getId();
            ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch)));
        }
        return ic * (1 + i / 5);
    }

    /**
     * How much information is contained in {@code n} terms out of the whole ontology?
     *
     * @param n the number of selected terms
     * @return the information content captured by the selected terms
     */
    private double informationContent(long n) {
        return n == 0 ? 0 : -Math.log((n * 1.0) / this.totalTerms) / Math.log(2);
    }

    private Date now() {
        return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT).getTime();
    }
}