Java tutorial
/******************************************************************************* * Australian National University Metadata Stores * Copyright (C) 2013 The Australian National University * * This file is part of Australian National University Metadata Stores. * * Australian National University Metadata Stores is free software: you * can redistribute it and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation, * either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package au.edu.anu.metadatastores.harvester; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.net.ConnectException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.TimeZone; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.hibernate.Query; import org.hibernate.Session; import org.openarchives.oai._2.OAIPMHtype; import org.openarchives.oai._2.RecordType; import org.openarchives.oai._2.StatusType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import ORG.oclc.oai.harvester2.verb.ListRecords; import au.edu.anu.metadatastores.datamodel.harvester.HarvestContent; import au.edu.anu.metadatastores.datamodel.harvester.Location; /** * <p>Harvest<p> * * <p>The Australian National University</p> * * <p>Class that harvests data from designated locations</p> * * @author Genevieve Turner * */ public class Harvest { static final Logger LOGGER = LoggerFactory.getLogger(Harvest.class); /** * Starter class for harvesting * * @param args */ public static void main(String[] args) { if (args.length == 0) { LOGGER.error("No Location Specified"); return; } Harvest harvest = new Harvest(); try { harvest.harvest(args[0]); } catch (HarvestException e) { LOGGER.error("Exception harvesting content", e); } } /** * Constructor */ public Harvest() { } /** * Harvest the records from the given system * * @param harvestSystem The string of the system to harvest records for * @throws HarvestException */ public void harvest(String harvestSystem) throws HarvestException { Session session = HarvesterHibernateUtil.getSessionFactory().openSession(); Location location = null; try { Query query = session.createQuery("FROM Location WHERE system = :system"); query.setParameter("system", harvestSystem); location = (Location) query.uniqueResult(); } finally { session.close(); } if (location == null) { harvest(location); } else { throw new HarvestException("No location found for the system: " + harvestSystem); } } /** * Harvest the records from the given system * * @param location The location to harvest from * @throws HarvestException */ public void harvest(Location location) throws HarvestException { LOGGER.info("Begin Harvest"); location.getSystem(); Date lastHarvestDate = location.getLastHarvestDate(); Date now = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); sdf.setTimeZone(TimeZone.getTimeZone("GMT")); String from = null; String until = null; if (lastHarvestDate != null) { from = sdf.format(lastHarvestDate); until = sdf.format(now); LOGGER.info("From: {}, Until: {}", from, until); } location.setLastHarvestDate(new Date()); try { ListRecords listRecords = new ListRecords(location.getUrl(), from, until, null, location.getMetadataPrefix()); while (listRecords != null) { NodeList errors = listRecords.getErrors(); if (errors != null && errors.getLength() > 0) { processErrors(errors); break; } List<HarvestContent> harvestContents = processList(listRecords.getDocument(), location.getSystem()); saveList(harvestContents); String resumptionToken = listRecords.getResumptionToken(); if (resumptionToken == null || resumptionToken.length() == 0) { listRecords = null; } else { listRecords = new ListRecords(location.getUrl(), resumptionToken); } } updateLocation(location); } catch (TransformerException e) { throw new HarvestException(e.getMessage(), e); } catch (SAXException e) { throw new HarvestException(e.getMessage(), e); } catch (ParserConfigurationException e) { throw new HarvestException(e.getMessage(), e); } catch (ConnectException e) { if (LOGGER.isDebugEnabled()) { LOGGER.error("Exception ", e); } throw new HarvestException("Unable to connect to " + location.getUrl()); } catch (IOException e) { throw new HarvestException(e.getMessage(), e); } catch (NoSuchFieldException e) { throw new HarvestException(e.getMessage(), e); } LOGGER.info("Harvest Complete"); } /** * Process harvesting errors and log errors appropriately * * @param errors The errors that occured while harvesting */ public void processErrors(NodeList errors) { for (int i = 0; i < errors.getLength(); i++) { Node error = errors.item(i); if (error.getNodeType() == Node.ELEMENT_NODE) { Element element = (Element) error; String errorCode = element.getAttribute("code"); if ("noRecordsMatch".equals(errorCode)) { LOGGER.debug("No records found"); } else { LOGGER.error("Error harvesting records - {} - {}", errorCode, element.getTextContent()); } } } } /** * Update the location information * * @param location The location information to update */ public void updateLocation(Location location) { Session session = HarvesterHibernateUtil.getSessionFactory().openSession(); try { session.beginTransaction(); session.merge(location); session.getTransaction().commit(); } finally { session.close(); } } /** * Process the list of records that have been harvested * * @param listRecords The list of records to process * @param system The system that has been harvested from * @return The list of harvested content */ private List<HarvestContent> processList(Node listRecords, String system) { List<HarvestContent> harvestContents = new ArrayList<HarvestContent>(); try { JAXBContext jaxbContext = JAXBContext.newInstance(OAIPMHtype.class); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); JAXBElement<OAIPMHtype> element = unmarshaller.unmarshal(listRecords, OAIPMHtype.class); OAIPMHtype oaipmh = element.getValue(); List<RecordType> records = oaipmh.getListRecords().getRecord(); HarvestContent harvestContent = null; String identifier = null; for (RecordType pmhRecord : records) { harvestContent = new HarvestContent(); harvestContent.setSystem(system); identifier = pmhRecord.getHeader().getIdentifier(); LOGGER.debug("Identifier: " + identifier); harvestContent.setIdentifier(identifier); if (StatusType.DELETED.equals(pmhRecord.getHeader().getStatus())) { harvestContent.setContent("deleted"); } else if (pmhRecord.getMetadata() != null) { StringWriter sw = new StringWriter(); streamNode((Element) pmhRecord.getMetadata().getAny(), sw); harvestContent.setContent(sw.toString()); } else { LOGGER.error("Record does not have the status of deleted and has no metadata"); LOGGER.info("Status: {}", pmhRecord.getHeader().getStatus()); } harvestContents.add(harvestContent); } } catch (JAXBException e) { LOGGER.error("Exception performing unmarshal", e); } return harvestContents; } /** * Save the list of harvested content * * @param harvestContents The list of harvested content to save */ private void saveList(List<HarvestContent> harvestContents) { Session session = HarvesterHibernateUtil.getSessionFactory().openSession(); try { session.beginTransaction(); HarvestContent content = null; for (int i = 0; i < harvestContents.size(); i++) { content = harvestContents.get(i); session.save(content); if (i % 20 == 0) { session.flush(); session.clear(); } } session.getTransaction().commit(); } finally { session.close(); } } /** * Put the node into a stream * * @param node The node to put into a stream * @param writer The writer for the stream */ private void streamNode(Node node, Writer writer) { StreamResult result = new StreamResult(writer); streamNode(node, result); } /** * Write the node to the given stream * * @param node The node to write into the stream * @param result The stream to write to */ private void streamNode(Node node, StreamResult result) { try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); DOMSource source = new DOMSource(node); transformer.transform(source, result); } catch (TransformerException e) { e.printStackTrace(); } } }