Java tutorial
/* * The Gemma project * * Copyright (c) 2006 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.gemma.core.loader.entrez.pubmed; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.junit.After; import org.junit.Before; import org.junit.Test; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.Keyword; import ubic.gemma.model.common.description.MedicalSubjectHeading; import ubic.gemma.model.expression.biomaterial.Compound; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.Collection; import java.util.Date; import java.util.zip.GZIPInputStream; import static org.junit.Assert.*; /** * @author pavlidis */ public class PubMedXMLParserTest { private static final Log log = LogFactory.getLog(PubMedXMLParserTest.class.getName()); private InputStream testStream; private PubMedXMLParser testParser; @Before public void setUp() { testParser = new PubMedXMLParser(); } @After public void tearDown() throws Exception { testStream.close(); testParser = null; testStream = null; } @Test public void testParse() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-test.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); assertEquals("Lee, Homin K; Hsu, Amy K; Sajdak, Jon; Qin, Jie; Pavlidis, Paul", br.getAuthorList()); assertNotNull(br.getAbstractText()); assertEquals("Genome Res", br.getPublication()); assertEquals("15173114", br.getPubAccession().getAccession()); assertEquals("Coexpression analysis of human genes across many microarray data sets.", br.getTitle()); assertNotNull(br.getVolume()); assertNotNull(br.getPages()); SimpleDateFormat f = new SimpleDateFormat("mm/HH/MM/dd/yyyy"); assertEquals("00/00/06/01/2004", f.format(br.getPublicationDate())); } catch (RuntimeException e) { this.logOrThrowException(e); } } @Test public void testParseBook() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-fullbook.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); assertNotNull(br); assertEquals("21796826", br.getPubAccession().getAccession()); assertEquals("Field, Marilyn J; Boat, Thomas F", br.getEditor()); assertEquals( "Institute of Medicine (US) Committee on Accelerating Rare Diseases Research and Orphan Product Development", br.getAuthorList()); assertEquals("Rare Diseases and Orphan Products: Accelerating Research and Development", br.getPublication()); assertEquals("Rare Diseases and Orphan Products: Accelerating Research and Development", br.getTitle()); SimpleDateFormat f = new SimpleDateFormat("yyyy"); Date publicationDate = br.getPublicationDate(); assertNotNull(publicationDate); assertEquals("2010", f.format(publicationDate)); assertTrue(br.getAbstractText() .startsWith("This Institute of Medicine (IOM) study grew out of discussions")); assertTrue(br.getAbstractText().endsWith("interested general public.")); } catch (RuntimeException e) { this.logOrThrowException(e); } } /* * Test uses 2030131 */ @Test public void testParseBookArticle() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-bookarticle.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); assertNotNull(br); assertEquals("Pagon, Roberta A; Bird, Thomas D; Dolan, Cynthia R; Stephens, Karen", br.getEditor()); assertEquals("Kuhlenbaumer, Gregor; Timmerman, Vincent", br.getAuthorList()); assertEquals("GeneReviews", br.getPublication()); assertEquals("Giant Axonal Neuropathy", br.getTitle()); SimpleDateFormat f = new SimpleDateFormat("yyyy"); Date publicationDate = br.getPublicationDate(); assertNotNull(publicationDate); assertEquals("2003", f.format(publicationDate)); assertTrue(br.getAbstractText() .startsWith("Giant axonal neuropathy (GAN) is characterized by a severe early-onset")); assertTrue(br.getAbstractText().endsWith( "offering custom prenatal testing if the disease-causing mutations in a family are known.")); } catch (RuntimeException e) { this.logOrThrowException(e); } } @Test public void testParseMesh() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-mesh-test.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); Collection<MedicalSubjectHeading> meshTerms = br.getMeshTerms(); assertEquals(16, meshTerms.size()); // for ( MedicalSubjectHeading heading : meshTerms ) { // log.info( heading.getTerm() + " " + heading.getIsMajorTopic() ); // for ( MedicalSubjectHeading q : heading.getQualifiers() ) { // log.info( " qualifier: " + q.getTerm() + " " + q.getIsMajorTopic() ); // } // } } catch (RuntimeException e) { this.logOrThrowException(e); } } /* * This uses a medline-format file, instead of the pubmed xml files we get from the eutils. */ @Test public void testParseMulti() throws Exception { try { testStream = new GZIPInputStream( PubMedXMLParserTest.class.getResourceAsStream("/data/loader/medline.multi.xml.gz")); Collection<BibliographicReference> brl = testParser.parse(testStream); assertEquals(147, brl.size()); int expectedNumberofKeywords = 258; int expectedNumberofCompounds = 46; int actualNumberofKeywords = 0; int actualNumberofCompounds = 0; for (BibliographicReference reference : brl) { assertNotNull(reference.getPublicationDate()); Collection<Keyword> keywords = reference.getKeywords(); for (Keyword keyword : keywords) { assertNotNull(keyword.getTerm()); // log.info( keyword.getTerm() ); actualNumberofKeywords++; } for (Compound c : reference.getChemicals()) { assertNotNull(c.getName()); // log.info( c.getName() ); actualNumberofCompounds++; } // assertTrue( reference.getPublicationTypes().size() > 0 ); } assertEquals(expectedNumberofKeywords, actualNumberofKeywords); assertEquals(expectedNumberofCompounds, actualNumberofCompounds); } catch (RuntimeException e) { this.logOrThrowException(e); } } @Test public void testParseMultipartAbstract() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-mpabs.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); assertNotNull(br.getAbstractText()); assertTrue(br.getAbstractText().startsWith("PURPOSE: To dete")); assertTrue(br.getAbstractText() .contains("METHODS: RGCs of Brown Norway rats were retrogradely labeled bilaterally with the " + "fluorescent dye 4-(4-(dihexadecylamino)styryl)-N")); assertTrue(br.getAbstractText() .contains("CONCLUSIONS: The SLO is useful for in vivo imaging of rat RGCs")); PubMedXMLParserTest.log.info(br.getAbstractText()); } catch (RuntimeException e) { this.logOrThrowException(e); } } /* * PMID 7914452 is an example of a retracted article. */ @Test public void testParseRetracted() { try { testStream = PubMedXMLParserTest.class.getResourceAsStream("/data/pubmed-retracted.xml"); Collection<BibliographicReference> brl = testParser.parse(testStream); BibliographicReference br = brl.iterator().next(); assertNotNull(br.getAbstractText()); assertEquals( "Retracted [In: Garey CE, Schwarzman AL, Rise ML, Seyfried TN. Nat Genet. 1995 Sep;11(1):104 PMID=7550304]", br.getDescription()); assertTrue(br.getRetracted()); } catch (RuntimeException e) { this.logOrThrowException(e); } } private void logOrThrowException(RuntimeException e) { if (e.getCause() instanceof java.net.ConnectException) { PubMedXMLParserTest.log.warn("Test skipped due to connection exception"); } else if (e.getCause() instanceof java.net.UnknownHostException) { PubMedXMLParserTest.log.warn("Test skipped due to unknown host exception"); } else { throw (e); } } }