Java tutorial
/* * The Fascinator - ReDBox/Mint SRU Client * Copyright (C) 2012 Queensland Cyber Infrastructure Foundation (http://www.qcif.edu.au/) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package com.googlecode.fascinator.redbox.sru; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentFactory; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.sub.goobi.helper.HttpClientHelper; /** * <p> * A light-weight SRU client implementation. Originally written for the purpose of searching the National Library of Australia's Party Infrastructure * Project (PIP) via GET (ie. not POST or SOAP, both of which PIP also supports). * </p> * <p> * More information/documentation for PIP is <a href="https://wiki.nla.gov.au/display/ARDCPIP/Documentation">available on the NLA wiki</a>. * </p> * * @author Greg Pendlebury * * <p> * Credit for some of inspiration has to go to another light-weight implementation available under LGPL we looked at before we started coding: * <a href="http://code.google.com/p/sinciput/source/browse/trunk/sinciput/src/com/technosophos/sinciput/sru/SRUClient.java">SRUClient</a> * from 'Sinciput'. * </p> * */ public class SRUClient { /** Logging **/ private static Logger log = LoggerFactory.getLogger(SRUClient.class); /** A SAX Reader for XML parsing **/ private SAXReader saxReader; /** Namespaces for XML parsing **/ private Map<String, String> namespaces; /** Default URL is for the NLA **/ private String baseUrl = "http://www.nla.gov.au/apps/srw/search/peopleaustralia"; /** Default Schema is for EAC-CPF records from the NLA **/ private String recordSchema = "urn:isbn:1-931666-33-4"; /** Version parameter for the query **/ private String sruVersion = "1.1"; /** Request a particular response packing **/ private String responsePacking = "xml"; /** Unit testing only. Fake search response **/ private String testingResponseString; private String maximumRecords = "2"; /** * <p> * Default Constructor. Connect to the NLA unless otherwise instructed. This will rely on the more complicated constructor defaulting to the * searching for EAC-CPF records as well. * </p> * */ public SRUClient() { saxInit(); } /** * <p> * Constructor indicating the base URL for the SRU interface. * </p> * * @param baseUrl The Base URL for the SRU interface. Required. * @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4') * @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed. */ public SRUClient(String baseUrl) throws MalformedURLException { this(baseUrl, null, null, null); } /** * <p> * Constructor indicating the base URL and metadata schema. * </p> * * @param baseUrl The Base URL for the SRU interface. Required. * @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4') * @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed. */ public SRUClient(String baseUrl, String schema) throws MalformedURLException { this(baseUrl, schema, null, null); } /** * <p> * Constructor indicating the base URL, metadata schema and format packing for responses. * </p> * * @param baseUrl The Base URL for the SRU interface. Required. * @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4') * @param packing The SRU 'recordPacking' to use. NULL values will default to 'xml' * @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed. */ public SRUClient(String baseUrl, String schema, String packing) throws MalformedURLException { this(baseUrl, schema, packing, null); } /** * <p> * This constructor is where the real work happens. All the constructors above provide wrappers of this one based on how much you want to deviate * from the defaults (which assume you are connecting to the NLA. * </p> * * @param baseUrl The Base URL for the SRU interface. Required. * @param version The SRU 'version' to use. NULL values will default to v1.1 * @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4') * @param packing The SRU 'recordPacking' to use. NULL values will default to 'xml' * @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed. */ public SRUClient(String baseUrl, String schema, String packing, String version) throws MalformedURLException { // Make sure our URL is valid first try { @SuppressWarnings("unused") URL url = new URL(baseUrl); this.baseUrl = baseUrl; } catch (MalformedURLException ex) { log.error("Invalid URL passed to constructor: ", ex); throw ex; } // Start with the default NLA parameters if nothing has been configured // NLA = EAC-CPF if (schema != null) { recordSchema = schema; } // NLA = 1.1 if (version != null) { sruVersion = version; } // NLA = xml if (packing != null) { responsePacking = packing; } // http://sru.gbv.de/gvk?version=1.1&recordSchema=picaxml&operation=searchRetrieve&query=pica.ppn%3D380872331&maximumRecords=10 // http://sru.gbv.de/gvk?version=1.1&recordSchema=picaxml&operation=searchRetrieve&query=pica.ppn%3D380872331 saxInit(); } /** * <p> * Used to change the 'recordSchema' after instantiation. All outgoing requests sent after this call will use the new schema. * </p> * * @param newSchema The new schema to use. */ public void setRecordSchema(String newSchema) { recordSchema = newSchema; } /** * <p> * Used to change the 'version' after instantiation. All outgoing requests sent after this call will use the new version. * </p> * * @param newVersion The new version to use. */ public void setVersion(String newVersion) { sruVersion = newVersion; } /** * <p> * Used to change 'recordPacking' after instantiation. All outgoing requests sent after this call will use the new format. * </p> * * @param newPacking The new packing format to use. */ public void setPacking(String newPacking) { responsePacking = newPacking; } /** * <p> * Simple init for the SAX Reader. * </p> * */ private void saxInit() { namespaces = new HashMap<>(); DocumentFactory docFactory = new DocumentFactory(); docFactory.setXPathNamespaceURIs(namespaces); saxReader = new SAXReader(docFactory); } /** * <p> * Used in unit testing to indicate a package resource to use as search responses, rather then submitting a real SRU query. * </p> * * @param fileName The name of a resource 'file' to use as simulated search result. * @throws IOException If encoding/access issues occur accessing the resource. */ public void testResponseResource(String fileName) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(getClass().getResourceAsStream("/" + fileName), out); testingResponseString = out.toString("UTF-8"); } /** * Parse an XML document from a string * * @param xmlData The String to parse * @return Document The parsed XML Object. Null if any problems occur. */ public Document parseXml(String xmlData) { try { byte[] bytes = xmlData.getBytes("utf-8"); ByteArrayInputStream in = new ByteArrayInputStream(bytes); return saxReader.read(in); } catch (UnsupportedEncodingException ex) { log.error("Input is not UTF-8", ex); return null; } catch (DocumentException ex) { log.error("Failed to parse XML", ex); return null; } } /** * <p> * Parse an XML String response and populate a response Object. * </p> * * @param xmlData The XML String returned from the search * @return SRUResponse An instantiated response object */ public SRUResponse getResponseObject(String xmlData) { // Parsing Document xmlResponse = parseXml(xmlData); if (xmlResponse == null) { log.error("Can't get results after XML parsing failed."); return null; } // Processing SRUResponse response = null; try { response = new SRUResponse(xmlResponse); } catch (SRUException ex) { log.error("Error processing XML response:", ex); } return response; } /** * <p> * Parse an XML String response and get a List Object containing all of the SRU search results. * </p> * * @param xmlData The XML String returned from the search * @return List<Node> A List containing a DOM4J Node for each search result */ public List<Node> getResultList(String xmlData) { SRUResponse response = getResponseObject(xmlData); if (response == null) { log.error("Unable to get results from response XML."); return null; } return response.getResults(); } /** * <p> * Basic wrapper for safely encoding Strings used in URLs. * </p> * * @param value The String to be used in the URL * @return String A safely encoded version of 'value' for use in URLs. */ private String encode(String value) { try { return URLEncoder.encode(value, "UTF-8"); } catch (UnsupportedEncodingException ex) { log.error("Error UTF-8 encoding value '{}'", value, ex); return ""; } } /** * <p> * Generate a basic search URL for this SRU interface. * </p> * * @param query The query String to perform against the SRU interface. * @return String A URL that can be retrieved to execute this search. */ public String generateSearchUrl(String query) { return this.generateSearchUrl(query, null, null, null, null); } /** * <p> * Generate a search URL for this SRU interface. No sorting or pagination. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @return String A URL that can be retrieved to execute this search. */ public String generateSearchUrl(String query, String operation) { return this.generateSearchUrl(query, operation, null, null, null); } /** * <p> * Generate a search URL for this SRU interface. No pagination. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @param sortKeys Sorting. Optional, with no default. * @return String A URL that can be retrieved to execute this search. */ public String generateSearchUrl(String query, String operation, String sortKeys) { return this.generateSearchUrl(query, operation, sortKeys, null, null); } /** * <p> * Generate a search URL for this SRU interface. This is the actual implementation method wrapped by the methods above with most parameters as * optional. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @param sortKeys Sorting. Optional, with no default. * @param startRecord Starting record number. Optional, with no default. * @param maxRecords Maximum rows to return. Optional, with no default. * @return String A URL that can be retrieved to execute this search. */ public String generateSearchUrl(String query, String operation, String sortKeys, String startRecord, String maxRecords) { String searchUrl = baseUrl; if (query == null) { log.error("Cannot generate a search URL without a search! 'query' parameter is required."); return null; } if (operation == null) { operation = "searchRetrieve"; } // URL basics if (searchUrl.contains("?")) { searchUrl += "&version=" + encode(sruVersion); } else { searchUrl += "?version=" + encode(sruVersion); } searchUrl += "&recordSchema=" + encode(recordSchema); searchUrl += "&recordPacking=" + encode(responsePacking); // Search basics searchUrl += "&operation=" + encode(operation); searchUrl += "&query=" + encode(query); // Optional extras on search. Sorting and pagination if (sortKeys != null) { searchUrl += "&sortKeys=" + encode(sortKeys); } if (startRecord != null) { searchUrl += "&startRecord=" + encode(startRecord); } if (maximumRecords != null) { searchUrl += "&maximumRecords=" + encode(maximumRecords); } return searchUrl; } /** * <p> * Perform a basic search and return the response body. * </p> * * @param query The query String to perform against the SRU interface. * @return String The response body return from the SRU interface. */ public String getSearchResponse(String query) { return getSearchResponse(query, null, null, null, null); } /** * <p> * Perform a search and return the response body. No sorting or pagination. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @return String The response body return from the SRU interface. */ public String getSearchResponse(String query, String operation) { return getSearchResponse(query, operation, null, null, null); } /** * <p> * Perform a search and return the response body. No pagination. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @param sortKeys Sorting. Optional, with no default. * @return String The response body return from the SRU interface. */ public String getSearchResponse(String query, String operation, String sortKeys) { return getSearchResponse(query, operation, sortKeys, null, null); } /** * <p> * Perform a search and return the response body. This is the actual implementation method wrapped by the methods above with most parameters as * optional. * </p> * * @param query The query String to perform against the SRU interface. Required. * @param operation The 'operation' perform. If null this will default to 'searchRetrieve'. * @param sortKeys Sorting. Optional, with no default. * @param startRecord Starting record number. Optional, with no default. * @param maxRecords Maximum rows to return. Optional, with no default. * @return String The response body return from the SRU interface. */ public String getSearchResponse(String query, String operation, String sortKeys, String startRecord, String maxRecords) { // Get a search URL to execute first String searchUrl = generateSearchUrl(query, operation, sortKeys, startRecord, maxRecords); if (searchUrl == null) { log.error("Invalid search URL. Cannot perform search."); return null; } // Unit testing... don't perform a real search if (testingResponseString != null) { return testingResponseString; } // Perform the search String response = null; CloseableHttpClient client = HttpClientBuilder.create().build(); HttpGet method = new HttpGet(searchUrl); try { response = client.execute(method, HttpClientHelper.stringResponseHandler); } catch (IOException e) { log.error("Error during search: ", e); } finally { method.releaseConnection(); if (client != null) { try { client.close(); } catch (IOException e) { log.error("Error during search"); } } } return response; } /** * <p> * Make sure that the SAX Reader is aware of the XML namespaces used by the NLA when parsing their. * </p> * */ private void nlaNamespaces() { if (!namespaces.containsKey("srw")) { namespaces.put("srw", "http://www.loc.gov/zing/srw/"); } if (!namespaces.containsKey("eac")) { namespaces.put("eac", "urn:isbn:1-931666-33-4"); } } /** * <p> * Search for a record from the National Library of Australia with the provided identifier. If multiple records match this identifier only the * first will be returned. * </p> * * @param id The identifier to search for * @return String The record matching this identifier. Null if not found */ private Node nlaGetRecordNodeById(String id) { nlaNamespaces(); // Run a search String query = "rec.identifier=\"" + id + "\""; String rawXml = getSearchResponse(query); // Get the results nodes List<Node> results = getResultList(rawXml); if (results.isEmpty()) { log.warn("This identifier matches no records."); return null; } if (results.size() > 1) { log.warn("This identifier matches multiple records! Returning only the first."); } // Return first(only?) record if ("xml".equals(responsePacking)) { return results.get(0).selectSingleNode("*[1]"); } else { return results.get(0); } } /** * <p> * Search for a record from the National Library of Australia with the provided identifier. If multiple records match this identifier only the * first will be returned. * </p> * * @param id The identifier to search for * @return String The record matching this identifier. Null if not found */ public String nlaGetRecordById(String id) { Node node = nlaGetRecordNodeById(id); if (node == null) { return null; } if ("xml".equals(responsePacking)) { return node.asXML(); } else { return node.getText(); } } /** * <p> * Search for a record from the National Library of Australia with the provided identifier. If multiple records match this identifier only the * first will be returned. * </p> * * @param id The identifier to search for * @return String The record matching this identifier. Null if not found */ public String nlaGetNationalId(String id) { Node node = nlaGetRecordNodeById(id); if (node == null) { return null; } @SuppressWarnings("unchecked") List<Node> otherIds = node.selectNodes("eac:control/eac:otherRecordId"); for (Node idNode : otherIds) { String otherId = idNode.getText(); if (otherId.startsWith("http://nla.gov.au")) { return otherId; } } return null; } /** * <p> * Search for a record from the National Library of Australia with the provided identifier. Process and return their Identity record. * </p> * * @param id The identifier to search for * @return NLAIdentity A processed Identity * @throws SRUException If processing the Identity fails. */ public NLAIdentity nlaGetIdentityById(String id) throws SRUException { Node node = nlaGetRecordNodeById(id); return new NLAIdentity(node); } /** * <p> * Search for a records from the National Library of Australia and parse the resultant XML is a wrapper object. * </p> * * @param search The search to submit to the NLA * @return SRUResponse A parsed response */ public SRUResponse nlaGetResponseBySearch(String search) { return nlaGetResponseBySearch(search, null, null); } /** * <p> * Search for a records from the National Library of Australia and parse the resultant XML is a wrapper object. * </p> * * @param search The search to submit to the NLA * @param startRecord Starting record number. Optional, with no default. * @param maxRecords Maximum rows to return. Optional, with no default. * @return SRUResponse A parsed response */ public SRUResponse nlaGetResponseBySearch(String search, String startRecord, String maxRecords) { nlaNamespaces(); // Search NLA String xmlResponse = getSearchResponse(search, null, null, startRecord, maxRecords); if (xmlResponse == null) { log.error("Searching NLA failed!"); return null; } // Parse results return getResponseObject(xmlResponse); } /** * <p> * Search for records from the National Library of Australia. Process and return their Identity records. It is important to note that if any * Identity fails to process it will not appear in the List. * </p> * * @param search The search to submit to the NLA * @return List<NLAIdentity> A list of processed Identities */ public List<NLAIdentity> nlaGetIdentitiesBySearch(String search) { return nlaGetIdentitiesBySearch(search, null, null); } /** * <p> * Search for records from the National Library of Australia. Process and return their Identity records. It is important to note that if any * Identity fails to process it will not appear in the List. * </p> * * @param search The search to submit to the NLA * @param startRecord Starting record number. Optional, with no default. * @param maxRecords Maximum rows to return. Optional, with no default. * @return List<NLAIdentity> A list of processed Identities */ public List<NLAIdentity> nlaGetIdentitiesBySearch(String search, String startRecord, String maxRecords) { SRUResponse response = nlaGetResponseBySearch(search); if (response == null) { log.error("Searching NLA failed!"); return null; } // Process Identities return NLAIdentity.convertNodesToIdentities(response.getResults()); } public String getMaximumRecords() { return maximumRecords; } public void setMaximumRecords(String maximumRecords) { this.maximumRecords = maximumRecords; } }