Java tutorial
/* * MIT License * * Copyright (c) 2016 EPAM Systems * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package com.epam.catgenome.manager.externaldb.ncbi; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.epam.catgenome.component.MessageHelper; import com.epam.catgenome.constant.MessagesConstants; import com.epam.catgenome.controller.JsonMapper; import com.epam.catgenome.controller.vo.externaldb.NCBIGeneVO; import com.epam.catgenome.controller.vo.externaldb.NCBISummaryVO; import com.epam.catgenome.exception.ExternalDbUnavailableException; import com.epam.catgenome.manager.externaldb.ncbi.parser.NCBIGeneInfoParser; import com.epam.catgenome.manager.externaldb.ncbi.util.NCBIDatabase; import com.epam.catgenome.manager.externaldb.ncbi.util.NCBIUtility; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; /** * <p> * A Manager to get gene data from NCBI * </p> */ @Service public class NCBIGeneManager { private static final String RESULT_PATH = "result"; private JsonMapper mapper = new JsonMapper(); @Autowired private NCBIAuxiliaryManager ncbiAuxiliaryManager; @Autowired private NCBIGeneInfoParser geneInfoParser; private static final int NUMBER_OF_PUBLICATIONS = 5; private static final String UIDS = "uids"; private static final String NCBI_PUBMED_URL = "https://www.ncbi.nlm.nih.gov/pubmed/%s/"; private static final String NCBI_BIOSYSTEM_URL = "https://www.ncbi.nlm.nih.gov/biosystems/" + "%s?Sel=geneid:%s#show=genes"; private static final String NCBI_PUBMED_FULL_URL = "https://www.ncbi.nlm.nih.gov/" + "pubmed?LinkName=gene_pubmed&from_uid=%s"; private static final String NCBI_PUBMED_HOMOLOG_URL = "https://www.ncbi.nlm.nih.gov/pubmed?LinkName=" + "homologene_pubmed&from_uid=%s"; private static final String NCBI_GENE_LINK = "https://www.ncbi.nlm.nih.gov/gene/%s"; /** * Retrieves XML gene info from NCBI's gene database * * @param id gene id * @return NCBIGeneVO * @throws ExternalDbUnavailableException */ public NCBIGeneVO fetchGeneById(final String id) throws ExternalDbUnavailableException { String realID = id; NCBIGeneVO ncbiGeneVO = null; if (StringUtils.isNotBlank(id)) { // if ID contains literals then we consider this external ID and perform search if (!id.matches("\\d+")) { String ncbiId = ncbiAuxiliaryManager.searchDbForId(NCBIDatabase.GENE.name(), realID); realID = StringUtils.isNotBlank(ncbiId) ? ncbiId : id; } String geneInfoXml = ncbiAuxiliaryManager.fetchXmlById(NCBIDatabase.GENE.name(), realID, null); ncbiGeneVO = geneInfoParser.parseGeneInfo(geneInfoXml); ncbiGeneVO.setLinkToCitations(String.format(NCBI_PUBMED_FULL_URL, ncbiGeneVO.getGeneId())); ncbiGeneVO.setGeneLink(String.format(NCBI_GENE_LINK, ncbiGeneVO.getGeneId())); String pubmedQueryXml = ncbiAuxiliaryManager.link(realID, NCBIDatabase.GENE.name(), NCBIDatabase.PUBMED.name(), "gene_pubmed"); Pair<String, String> stringStringPair = geneInfoParser.parseHistoryResponse(pubmedQueryXml, NCBIUtility.NCBI_LINK); String pubmedHistoryQuery = stringStringPair.getLeft(); String pubmedHistoryWebenv = stringStringPair.getRight(); JsonNode pubmedEntries = ncbiAuxiliaryManager.summaryWithHistory(pubmedHistoryQuery, pubmedHistoryWebenv); JsonNode pubmedResultRoot = pubmedEntries.path(RESULT_PATH).path(UIDS); try { parseJsonFromPubmed(pubmedResultRoot, pubmedEntries, ncbiGeneVO); } catch (JsonProcessingException e) { throw new ExternalDbUnavailableException( MessageHelper.getMessage(MessagesConstants.ERROR_NO_RESULT_BY_EXTERNAL_DB), e); } String biosystemsQueryXml = ncbiAuxiliaryManager.link(realID, NCBIDatabase.GENE.name(), NCBIDatabase.BIOSYSTEMS.name(), "gene_biosystems"); Pair<String, String> biosystemsParams = geneInfoParser.parseHistoryResponse(biosystemsQueryXml, NCBIUtility.NCBI_LINK); String biosystemsHistoryQuery = biosystemsParams.getLeft(); String biosystemsHistoryWebenv = biosystemsParams.getRight(); JsonNode biosystemsEntries = ncbiAuxiliaryManager.summaryWithHistory(biosystemsHistoryQuery, biosystemsHistoryWebenv); JsonNode biosystemsResultRoot = biosystemsEntries.path(RESULT_PATH).path(UIDS); try { parseJsonFromBio(biosystemsResultRoot, biosystemsEntries, ncbiGeneVO); } catch (JsonProcessingException e) { throw new ExternalDbUnavailableException( MessageHelper.getMessage(MessagesConstants.ERROR_NO_RESULT_BY_EXTERNAL_DB), e); } String homologsQueryXml = ncbiAuxiliaryManager.link(realID, NCBIDatabase.GENE.name(), NCBIDatabase.HOMOLOGENE.name(), "gene_homologene"); Pair<String, String> homologsParams = geneInfoParser.parseHistoryResponse(homologsQueryXml, NCBIUtility.NCBI_LINK); String homologsQuery = homologsParams.getLeft(); String homologsWebenv = homologsParams.getRight(); JsonNode homologEntries = ncbiAuxiliaryManager.summaryWithHistory(homologsQuery, homologsWebenv); JsonNode homologsResultRoot = homologEntries.path(RESULT_PATH).path(UIDS); try { parseJsonFromHomolog(homologsResultRoot, homologEntries, ncbiGeneVO); } catch (JsonProcessingException e) { throw new ExternalDbUnavailableException( MessageHelper.getMessage(MessagesConstants.ERROR_NO_RESULT_BY_EXTERNAL_DB), e); } } return ncbiGeneVO; } public void setGeneInfoParser(NCBIGeneInfoParser geneInfoParser) { this.geneInfoParser = geneInfoParser; } private void parseJsonFromPubmed(final JsonNode pubmedResultRoot, final JsonNode pubmedEntries, final NCBIGeneVO ncbiGeneVO) throws JsonProcessingException { if (pubmedResultRoot.isArray()) { ncbiGeneVO.setPubNumber(pubmedResultRoot.size()); for (final JsonNode objNode : pubmedResultRoot) { if (ncbiGeneVO.getPubmedReferences().size() >= NUMBER_OF_PUBLICATIONS) { break; } JsonNode jsonNode = pubmedEntries.path(RESULT_PATH).get("" + objNode.asText()); NCBISummaryVO pubmedReference = mapper.treeToValue(jsonNode, NCBISummaryVO.class); pubmedReference.setLink(String.format(NCBI_PUBMED_URL, pubmedReference.getUid())); // take first author if (pubmedReference.getAuthors() != null) { pubmedReference.setMultipleAuthors(pubmedReference.getAuthors().size() > 1); NCBISummaryVO.NCBIAuthor ncbiAuthor = pubmedReference.getAuthors().get(0); pubmedReference.setAuthor(ncbiAuthor); pubmedReference.setAuthors(null); } ncbiGeneVO.getPubmedReferences().add(pubmedReference); } } } private void parseJsonFromBio(final JsonNode biosystemsResultRoot, final JsonNode biosystemsEntries, final NCBIGeneVO ncbiGeneVO) throws JsonProcessingException { if (biosystemsResultRoot.isArray()) { ncbiGeneVO.setPathwaysNumber(biosystemsResultRoot.size()); List<NCBISummaryVO> pathways = new ArrayList<>(biosystemsResultRoot.size()); for (final JsonNode objNode : biosystemsResultRoot) { JsonNode jsonNode = biosystemsEntries.path(RESULT_PATH).get("" + objNode.asText()); NCBISummaryVO biosystemsReference = mapper.treeToValue(jsonNode, NCBISummaryVO.class); biosystemsReference.setLink( String.format(NCBI_BIOSYSTEM_URL, biosystemsReference.getUid(), ncbiGeneVO.getGeneId())); pathways.add(biosystemsReference); } ncbiGeneVO.setBiosystemsReferences(pathways.stream() .sorted(Comparator.comparing(summary -> summary.getBiosystem().getBiosystemname())) .collect(Collectors.toList())); } } private void parseJsonFromHomolog(JsonNode homologsResultRoot, JsonNode homologEntries, NCBIGeneVO ncbiGeneVO) throws JsonProcessingException { if (homologsResultRoot.isArray() && homologsResultRoot.size() != 0) { final JsonNode objNode = homologsResultRoot.get(0); JsonNode jsonNode = homologEntries.path(RESULT_PATH).get("" + objNode.asText()); NCBISummaryVO summary = mapper.treeToValue(jsonNode, NCBISummaryVO.class); ncbiGeneVO.setLinkToHomologsCitations(String.format(NCBI_PUBMED_HOMOLOG_URL, summary.getUid())); } } }