Java tutorial
/******************************************************************************* * Copyright (C) 2008 Global Biodiversity Information Facility Secretariat. * All Rights Reserved. * * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. ******************************************************************************/ package org.gbif.harvest.digir; import org.gbif.harvest.constants.ProtocolTypeEnum; import org.gbif.harvest.core.Constants; import org.gbif.harvest.exception.HarvesterException; import org.gbif.harvest.exception.OperationStoppedException; import org.gbif.harvest.exception.WrappedSaxException; import org.gbif.harvest.log.I18nLog; import org.gbif.harvest.log.I18nLogFactory; import org.gbif.harvest.testutil.HarvesterTestUtils; import org.gbif.harvest.util.DataQualityUtils; import org.gbif.harvest.util.FileUtils; import org.gbif.harvest.util.GbifLogger; import org.gbif.harvest.util.RequestUtils; import org.gbif.harvest.util.TemplateUtils; import org.gbif.harvest.writers.RequestResponseWriterManager; import org.gbif.harvest.xml.DigesterUtils; import java.io.File; import java.io.IOException; import java.net.SocketException; import com.google.common.io.Files; import org.apache.commons.io.filefilter.PrefixFileFilter; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import static junit.framework.Assert.assertEquals; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class DigirHarvesterTest { protected I18nLog log = I18nLogFactory.getLog(this.getClass()); private DigirHarvester harvester; private File targetDirectory; private GbifLogger gbifLogger; private DataQualityUtils dataQualityUtils; private FileUtils fileUtils; // Modify the test destination, resource, and directory for testing different DAP // inventory response contains blank line and line with invalid chars to begin public static final String DIRECTORY = "academy of natural sciences _digir.acnatsci.org_/herpetology"; public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); public static final String TEST_DESTINATION = "http://digir.ansp.org/digir/DiGIR.php"; public static final String TEST_RESOURCE = "Herpetology"; public static final String TEST_PROTOCOL = "manis_digir_1_0"; public static final String TEST_MAPPING_FILE = "indexMapping_manis_dwc_1_0"; public static final String TEST_MAX_INVENTORY_RESPONSE = "1000"; // maxInventoryResponse public static final String TEST_MAX_SEARCH_RESPONSE = "200"; // maxSearchResponse public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; public static final int TEST_TARGET_COUNT = 5; // for testing duplicate name (some with trailing whitespace) // public static final String DIRECTORY = "compare/herp"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://digir.mcz.harvard.edu/digir/DiGIR.php"; // // public static final String TEST_RESOURCE = "mczherp"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "1000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse /* * // inventory response contains names unidentified and unidentifiable * public static final String DIRECTORY = "arctos _arctos.database.museum_/msb_bird"; * public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); * public static final String TEST_DESTINATION = "http://arctos.database.museum/digir/DiGIR.php"; * public static final String TEST_RESOURCE = "msb_bird"; * public static final String TEST_PROTOCOL = "manis_digir_1_0"; * public static final String TEST_MAPPING_FILE = "indexMapping_manis_dwc_1_0"; */ /* * // inventory response contains ranges composed of only single names * public static final String DIRECTORY = "australian antarctic data centre _data.aad.gov.au_/argos_tracking"; * public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); * public static final String TEST_DESTINATION = "http://data.aad.gov.au/digir/digir.php"; * public static final String TEST_RESOURCE = "argos_tracking"; * public static final String TEST_PROTOCOL = "digir_1_0"; * public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; * public static final String TEST_MAX_INVENTORY_RESPONSE = "200"; //maxInventoryResponse * public static final String TEST_MAX_SEARCH_RESPONSE = "10"; //maxSearchResponse */ // inventory response contains ranges composed of only single names // public static final String DIRECTORY = "arctos _arctos.database.museum_/kwp_ento"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://arctos.database.museum/digir/DiGIR.php"; // public static final String TEST_RESOURCE = "kwp_ento"; // public static final String TEST_PROTOCOL = "manis_digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_manis_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "1000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse /* * public static final String DIRECTORY = "missouri botanical garden _mo_ _digir.mobot.org_/mobot"; * public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); * public static final String TEST_DESTINATION = "http://digir.mobot.org/digir/DiGIR.php"; * public static final String TEST_RESOURCE = "MOBOT"; * public static final String TEST_PROTOCOL = "manis_digir_1_0"; * public static final String TEST_MAPPING_FILE = "indexMapping_manis_dwc_1_0"; * public static final String TEST_MAX_INVENTORY_RESPONSE = "200"; //maxInventoryResponse * public static final String TEST_MAX_SEARCH_RESPONSE = "900"; //maxSearchResponse */ /* * public static final String DIRECTORY = "aad/kelp"; * public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); * public static final String TEST_DESTINATION = "http://data.aad.gov.au:80/digir/digir.php"; * public static final String TEST_RESOURCE = "kelp"; * public static final String TEST_PROTOCOL = "digir_1_0"; * public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; * public static final String TEST_MAX_INVENTORY_RESPONSE = "200"; //maxInventoryResponse * public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; //maxSearchResponse */ // public static final String DIRECTORY = "gbif-spain _taray.csic.es_/mcnc-art"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://taray.csic.es:6000/digir/DiGIR.php"; // public static final String TEST_RESOURCE = "MCNC-Art"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "900"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "10000"; // maxSearchResponse // public static final String DIRECTORY = "mnhn/gicim"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://dsibib.mnhn.fr/ici/digir"; // public static final String TEST_RESOURCE = "gicim"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "900"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "10000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "1"; // minQueryTermLength // metadata response back on inventory // http://srpollinator.cria.org.br/provider/test/eg_search.php // public static final String DIRECTORY = "iabin/ce_ufpe"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // // public static final String TEST_DESTINATION = "http://portalsplink.cria.org.br/prov_pollinator/DiGIR.php"; // // public static final String TEST_DESTINATION = "http://srpollinator.cria.org.br/provider/DiGIR.php"; // public static final String TEST_DESTINATION = "http://srpollinator.cria.org.br:80/provider/DiGIR.php"; // public static final String TEST_RESOURCE = "CE-UFPE"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "1000000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "0"; // minQueryTermLength // // endless loop on inventory response // public static final String DIRECTORY = "scar/gicim"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://w2.scarmarbin.be/digir2/digir.php"; // public static final String TEST_RESOURCE = "gicim"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "10000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "0"; // minQueryTermLength // // // endless search: emptry responses // public static final String DIRECTORY = "korea/rsr9acf5b936e8db4707496fd0b229eae78"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://203.250.196.222:8080/KNA/DigirProvider/index_html"; // public static final String TEST_RESOURCE = "rsr9acf5b936e8db4707496fd0b229eae78"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "1000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; // minQueryTermLength // // name smaller than minQueryTermLength // public static final String DIRECTORY = "ny/ny"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://digir.nybg.org:1234/digir/DiGIR.php"; // public static final String TEST_RESOURCE = "NY"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "10000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; // minQueryTermLength // endless looping because no end of records determined // public static final String DIRECTORY = "arizona/endless"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://sod84.asu.edu/digir/DiGIR.php"; // public static final String TEST_RESOURCE = "ASULichens"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "10000"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; // minQueryTermLength // public static final String DIRECTORY = "usda/plants"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://plantsws.nrcs.usda.gov/digir/DiGIR.php"; // public static final String TEST_RESOURCE = "plants"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "900"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; // minQueryTermLength // paging did not work, end of records not parsed correctly // public static final String DIRECTORY = "danbif/bad"; // public static final String TEST_DIRECTORY = Constants.BASE_DIR.concat(File.separator).concat(DIRECTORY); // public static final String TEST_DESTINATION = "http://pythonprovider.danbif.dk/DigirProvider/index_html"; // public static final String TEST_RESOURCE = "rsrbdf789c3a6adf0ced1039ba4960afa31"; // public static final String TEST_PROTOCOL = "digir_1_0"; // public static final String TEST_MAPPING_FILE = "indexMapping_dwc_1_0"; // public static final String TEST_MAX_INVENTORY_RESPONSE = "900"; // maxInventoryResponse // public static final String TEST_MAX_SEARCH_RESPONSE = "1000"; // maxSearchResponse // public static final String TEST_MIN_QUERY_TERM_LENGTH = "3"; // minQueryTermLength // currently not used public static final String DATE_LAST_UPDATED = "2009-01-14T20:30:11-09:00"; // dateLastUpdated // name of concept to use to inventory public static final String CONCEPT = "ScientificName"; /** * Test method for {@link org.gbif.harvest.biocase.BiocaseHarvester#search(String, String, String, String, String, * String, Boolean, String, int)}. Difference from test3Harvest() being that this mocks the RequestUtils execute * method always returning a WrappedSaxException instead of a Diagnostics object. */ @Test public void testHarvestWithMockSAXException() throws IOException, OperationStoppedException, HarvesterException { // Mocked SAXException RequestUtils requestUtils = mock(RequestUtils.class); when(requestUtils.executeGetRequestAndReturnDiagnostics(anyString(), any(ProtocolTypeEnum.class), any(RequestResponseWriterManager.class))) .thenThrow(new WrappedSaxException(new SAXException("Mocked SAXException"))); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); // runs through all name ranges, doesn't just terminate first time it encounters a SAXException harvester.search(TEST_RESOURCE, TEST_DESTINATION, targetDirectory.getAbsolutePath(), TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); // collect search request files (there were 2 ranges, so there are 2 search request) String[] files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_REQUEST_FILENAME)); assertEquals(2, files.length); // there were no search response files because the exception was thrown each time files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_RESPONSE_FILENAME)); assertEquals(0, files.length); log.info("Harvest was successful. Please check the folder " + TEST_DIRECTORY); } /** * Test method for {@link org.gbif.harvest.biocase.BiocaseHarvester#search(String, String, String, String, String, * String, Boolean, String, int)}. Difference from test3Harvest() being that this mocks the RequestUtils execute * method always returning a WrappedSaxException instead of a Diagnostics object. */ @Test public void testHarvestWithMockSAXParseException() throws IOException, OperationStoppedException, HarvesterException { // Mocked SAXException RequestUtils requestUtils = mock(RequestUtils.class); when(requestUtils.executeGetRequestAndReturnDiagnostics(anyString(), any(ProtocolTypeEnum.class), any(RequestResponseWriterManager.class))) // (String message, String publicId, String systemId, int lineNumber, int columnNumber) .thenThrow(new WrappedSaxException(new SAXParseException("msg", "pubId", "sysId", 1, 1))); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); // runs through all name ranges, doesn't just terminate first time it encounters a SAXParseException harvester.search(TEST_RESOURCE, TEST_DESTINATION, targetDirectory.getAbsolutePath(), TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); // collect search request files (there were 2 ranges, 4 request per increment up to max target count size (5)) String[] files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_REQUEST_FILENAME)); assertEquals(48, files.length); // there were no search response files because the exception was thrown each time files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_RESPONSE_FILENAME)); assertEquals(0, files.length); log.info("Harvest was successful. Please check the folder " + TEST_DIRECTORY); } @Test(expected = HarvesterException.class) public void testHarvestWithMockNullDiagnostics() throws IOException, OperationStoppedException, HarvesterException { // Mocked SAXException RequestUtils requestUtils = mock(RequestUtils.class); when(requestUtils.executeGetRequestAndReturnDiagnostics(anyString(), any(ProtocolTypeEnum.class), any(RequestResponseWriterManager.class))) // (String message, String publicId, String systemId, int lineNumber, int columnNumber) .thenReturn(null); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); harvester.search(TEST_RESOURCE, TEST_DESTINATION, targetDirectory.getAbsolutePath(), TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); } @Test(expected = HarvesterException.class) public void testHarvestWithOperationStoppedException() throws IOException, OperationStoppedException, HarvesterException { // Mocked SAXException RequestUtils requestUtils = mock(RequestUtils.class); when(requestUtils.executeGetRequestAndReturnDiagnostics(anyString(), any(ProtocolTypeEnum.class), any(RequestResponseWriterManager.class))) // (String message, String publicId, String systemId, int lineNumber, int columnNumber) .thenThrow(new OperationStoppedException("Thread was manually terminated")); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); harvester.search(TEST_RESOURCE, TEST_DESTINATION, targetDirectory.getAbsolutePath(), TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); } /** * Test method for {@link org.gbif.harvest.biocase.BiocaseHarvester#search(String, String, String, String, String, * String, Boolean, String, int)}. Difference from test3Harvest() being that this mocks the RequestUtils execute * method always returning a SocketException instead of a Diagnostics object. */ @Ignore("Because it waits for 3 minutes after each exception") public void testHarvestWithSocketException() throws IOException, OperationStoppedException, HarvesterException { // Mocked SAXException RequestUtils requestUtils = mock(RequestUtils.class); when(requestUtils.executeGetRequestAndReturnDiagnostics(anyString(), any(ProtocolTypeEnum.class), any(RequestResponseWriterManager.class))).thenThrow(new SocketException("Socket Exception")); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); // runs through all name ranges, doesn't just terminate first time it encounters a SAXException harvester.search(TEST_RESOURCE, TEST_DESTINATION, targetDirectory.getAbsolutePath(), TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); // collect search request files (so initial + 5 retries = 6 max) String[] files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_REQUEST_FILENAME)); assertEquals(6, files.length); // there were no search response files because the exception was thrown each time files = targetDirectory.list(new PrefixFileFilter(Constants.SEARCH_RESPONSE_FILENAME)); assertEquals(0, files.length); } @Before public void setUp() throws IOException { // set up target directory targetDirectory = Files.createTempDir(); // Override setUp() gbifLogger = new GbifLogger(); dataQualityUtils = new DataQualityUtils(gbifLogger); fileUtils = new FileUtils(dataQualityUtils); // create and populate name ranges file needed by diferent search tests HarvesterTestUtils.prepareForSearch(targetDirectory, fileUtils); } @Ignore("Only used for live harvesting") public void setUpToRunThroughLiveHarvest() throws IOException, OperationStoppedException { GbifLogger gbifLogger = new GbifLogger(); DataQualityUtils dataQualityUtils = new DataQualityUtils(gbifLogger); FileUtils fileUtils = new FileUtils(dataQualityUtils); RequestUtils requestUtils = new RequestUtils(fileUtils, gbifLogger, "UTF-8", 360000, 6000, 1000, 5, 500); harvester = new DigirHarvester(new TemplateUtils(), fileUtils, requestUtils, new DigesterUtils(fileUtils), gbifLogger); } @Ignore("Only used for live harvesting") public void test1Inventory() { try { harvester.inventory(TEST_DESTINATION, CONCEPT, TEST_DIRECTORY, TEST_RESOURCE, TEST_PROTOCOL, TEST_MAX_INVENTORY_RESPONSE); log.info("DigirHarvester.inventory() succeeded. Please check: " + TEST_DIRECTORY); } catch (HarvesterException e) { log.error("DigirHarvester.inventory() failed: " + e.getMessage(), e); } } @Ignore("Only used for live harvesting") public void test2ProcessInventoried() { try { harvester.processInventoried(TEST_DIRECTORY, TEST_MAX_SEARCH_RESPONSE, TEST_MIN_QUERY_TERM_LENGTH); log.info("DigirHarvester.processInventoried() succeeded. Please check: " + TEST_DIRECTORY); } catch (Exception e) { log.error("DigirHarvester.processInventoried() failed: " + e.getMessage(), e); } } @Ignore("Only used for live harvesting") public void test4Process() { try { harvester.processHarvested(TEST_DIRECTORY, TEST_PROTOCOL, TEST_MAPPING_FILE); log.info("DigirHarvester.process() succeeded. Please check: " + TEST_DIRECTORY); } catch (Exception e) { log.error("DigirHarvester.search() failed: " + e.getMessage(), e); } } @Ignore("Only used for live harvesting") public void test3Search() { try { harvester.search(TEST_RESOURCE, TEST_DESTINATION, TEST_DIRECTORY, TEST_PROTOCOL, TEST_MAX_SEARCH_RESPONSE, TEST_TARGET_COUNT); log.info("DigirHarvester.search() succeeded. Please check: " + TEST_DIRECTORY); } catch (HarvesterException e) { log.error("DigirHarvester.search() failed: " + e.getMessage(), e); } } }