Java tutorial
package de.intranda.goobi.plugins.sru; /*************************************************************** * Copyright notice * * (c) 2016 Robert Sehr <robert.sehr@intranda.com> * * All rights reserved * * This file is part of the Goobi project. The Goobi project is free software; * you can redistribute it and/or modify it under the terms of the GNU General * Public License as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * The GNU General Public License can be found at * http://www.gnu.org/copyleft/gpl.html. * * This script is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * This copyright notice MUST APPEAR in all copies of this file! ***************************************************************/ import java.io.IOException; import java.io.StringReader; import java.net.MalformedURLException; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.goobi.production.plugin.interfaces.IOpacPlugin; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.Namespace; import org.jdom2.input.SAXBuilder; import org.jdom2.input.sax.XMLReaders; import org.w3c.dom.Node; import org.w3c.dom.Text; import com.googlecode.fascinator.redbox.sru.SRUClient; import de.intranda.goobi.plugins.GbvMarcSruImport; //import de.intranda.goobi.plugins.SwbMarcSruImport; import de.intranda.ugh.extension.MarcFileformat; import de.sub.goobi.helper.Helper; import ugh.dl.DigitalDocument; import ugh.dl.DocStruct; import ugh.dl.DocStructType; import ugh.dl.Fileformat; import ugh.dl.Prefs; import ugh.exceptions.PreferencesException; import ugh.exceptions.ReadException; import ugh.exceptions.TypeNotAllowedForParentException; import ugh.fileformats.mets.XStream; public class SRUHelper { private static final Namespace SRW = Namespace.getNamespace("srw", "http://www.loc.gov/zing/srw/"); private static Namespace MARC = Namespace.getNamespace("marc", "http://www.loc.gov/MARC21/slim"); public static void setMarcNamespace(Namespace marc) { MARC = marc; } public static String search(String catalogue, String schema, String searchField, String searchValue, String packing, String version) { SRUClient client; try { client = new SRUClient(catalogue, schema, packing, version); return client.getSearchResponse(searchField + "=" + searchValue); } catch (MalformedURLException e) { } return ""; } public static Node parseHaabResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String searchValue, String resultString, String packing, String version, boolean ignoreAnchor) throws IOException, JDOMException, ParserConfigurationException { SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0); return null; } opac.setHitcount(1); boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; boolean isMultiVolume = false; boolean isFSet = false; String anchorPpn = null; String otherAnchorPpn = null; String otherAnchorEpn = null; String otherPpn = null; String currentEpn = null; String otherEpn = null; boolean foundMultipleEpns = false; // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean shelfmarkFound = false; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isFSet = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); boolean isCurrentEpn = false; for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (ignoreAnchor) { sub.setText(""); } else if (isFSet || isPeriodical) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isFSet && anchorPpn == null)) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("776") && code.equals("w")) { if (otherPpn == null) { // found first/only occurrence otherPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { otherPpn = null; foundMultipleEpns = true; } } else if (tag.equals("954")) { if (code.equals("b")) { if (searchField.equals("pica.epn")) { // remove wrong epns currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); isCurrentEpn = true; if (!searchValue.trim().equals(currentEpn)) { sub.setAttribute("code", "invalid"); for (Element exemplarData : subfields) { if (exemplarData.getAttributeValue("code").equals("d")) { exemplarData.setAttribute("code", "invalid"); } } } } else { if (currentEpn == null) { isCurrentEpn = true; currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; } } } else if (code.equals("d")) { if (!shelfmarkFound && isCurrentEpn) { shelfmarkFound = true; } else { sub.setAttribute("code", "invalid"); } } } } } } // search for pica.zdb for periodca // get digital epn from digital ppn record if (otherPpn != null) { String otherResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherPpn, packing, version); Document otherDocument = new SAXBuilder().build(new StringReader(otherResult), "utf-8"); if (otherDocument != null) { Element otherRecord = getRecordWithoutSruHeader(otherDocument); if (otherRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherPpn = field.getText(); } } List<Element> fieldList = otherRecord.getChildren("datafield", MARC); for (Element field : fieldList) { String tag = field.getAttributeValue("tag"); List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("800") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", ""); } else if (isCartographic && tag.equals("830") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("954") && code.equals("b")) { if (otherEpn == null) { otherEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; otherEpn = null; } } } } } if (otherPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherPpn); datafield.addContent(subfield); data.add(datafield); } if (otherEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherEpn); datafield.addContent(subfield); data.add(datafield); } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { // get anchor record String anchorResult = SRUHelper.search(catalogue, schema, "pica.ppn", anchorPpn, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); // get EPN/PPN digital for anchor String otherAnchorResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherAnchorPpn, packing, version); Document otherAnchorDoc = new SAXBuilder().build(new StringReader(otherAnchorResult), "utf-8"); Element otherAnchorRecord = getRecordWithoutSruHeader(otherAnchorDoc); if (otherAnchorRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherAnchorRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherAnchorPpn = field.getText(); } } List<Element> fieldList = otherAnchorRecord.getChildren("datafield", MARC); for (Element field : fieldList) { if (field.getAttributeValue("tag").equals("954")) { List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); if (code.equals("b")) { if (otherAnchorEpn == null) { otherAnchorEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); ; } else { foundMultipleEpns = true; } } } } } if (otherAnchorPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorPpn); datafield.addContent(subfield); anchorData.add(datafield); } if (otherAnchorEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorEpn); datafield.addContent(subfield); anchorData.add(datafield); } } org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } if (foundMultipleEpns) { Helper.setFehlerMeldung("import_foundMultipleEPNs"); } collection.appendChild(marcRecord); return answer.getDocumentElement(); } public static Node parseGbvResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String resultString, String packing, String version) throws IOException, JDOMException, ParserConfigurationException { // removed validation against external dtd SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); // srw:searchRetrieveResponse Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0); return null; } else { opac.setHitcount(1); // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean isMultiVolume = false; boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; String anchorIdentifier = ""; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isMultiVolume = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (!isMultiVolume && !isPeriodical) { sub.setText(""); } else { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w") && isMultiVolume) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isMultiVolume && anchorIdentifier == null)) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { String anchorResult = SRUHelper.search(catalogue, schema, searchField, anchorIdentifier, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } collection.appendChild(marcRecord); return answer.getDocumentElement(); } } public static Element getRecordWithoutSruHeader(Document document) { Element root = document.getRootElement(); // <srw:records> Element srw_records = root.getChild("records", SRW); // <srw:record> if (srw_records == null) { return null; } List<Element> srw_recordList = srw_records.getChildren("record", SRW); // <srw:recordData> if (srw_recordList == null || srw_recordList.isEmpty()) { return null; } Element recordData = srw_recordList.get(0).getChild("recordData", SRW); Element record = recordData.getChild("record", MARC); return record; } public static Fileformat parseMarcFormat(Node marc, Prefs prefs, String epn) throws ReadException, PreferencesException, TypeNotAllowedForParentException { MarcFileformat pp = new MarcFileformat(prefs); pp.read(marc); DigitalDocument dd = pp.getDigitalDocument(); Fileformat ff = new XStream(prefs); ff.setDigitalDocument(dd); /* BoundBook hinzufgen */ DocStructType dst = prefs.getDocStrctTypeByName("BoundBook"); DocStruct dsBoundBook = dd.createDocStruct(dst); dd.setPhysicalDocStruct(dsBoundBook); return ff; } private static org.w3c.dom.Element getRecord(org.w3c.dom.Document answer, List<Element> data, IOpacPlugin plugin) { org.w3c.dom.Element marcRecord = answer.createElement("record"); // fix for wrong leader in SWB org.w3c.dom.Element leader = null; String author = ""; String title = ""; for (Element datafield : data) { if (datafield.getName().equals("leader") && leader == null) { leader = answer.createElement("leader"); marcRecord.appendChild(leader); String ldr = datafield.getText(); if (ldr.length() < 24) { ldr = "00000" + ldr; } Text text = answer.createTextNode(ldr); leader.appendChild(text); // get the leader field as a datafield org.w3c.dom.Element leaderDataField = answer.createElement("datafield"); leaderDataField.setAttribute("tag", "leader"); leaderDataField.setAttribute("ind1", " "); leaderDataField.setAttribute("ind2", " "); org.w3c.dom.Element subfield = answer.createElement("subfield"); leaderDataField.appendChild(subfield); subfield.setAttribute("code", "a"); Text dataFieldtext = answer.createTextNode(datafield.getText()); subfield.appendChild(dataFieldtext); marcRecord.appendChild(leaderDataField); } else if (datafield.getName().equals("controlfield")) { org.w3c.dom.Element field = answer.createElement("controlfield"); Text text = answer.createTextNode(datafield.getText()); field.appendChild(text); String tag = datafield.getAttributeValue("tag"); field.setAttribute("tag", tag); marcRecord.appendChild(field); // get the controlfields as datafields org.w3c.dom.Element leaderDataField = answer.createElement("datafield"); leaderDataField.setAttribute("tag", tag); leaderDataField.setAttribute("ind1", " "); leaderDataField.setAttribute("ind2", " "); org.w3c.dom.Element subfield = answer.createElement("subfield"); leaderDataField.appendChild(subfield); subfield.setAttribute("code", "a"); Text dataFieldtext = answer.createTextNode(datafield.getText()); subfield.appendChild(dataFieldtext); marcRecord.appendChild(leaderDataField); } else if (datafield.getName().equals("datafield")) { String tag = datafield.getAttributeValue("tag"); String ind1 = datafield.getAttributeValue("ind1"); String ind2 = datafield.getAttributeValue("ind2"); org.w3c.dom.Element field = answer.createElement("datafield"); marcRecord.appendChild(field); field.setAttribute("tag", tag); field.setAttribute("ind1", ind1); field.setAttribute("ind2", ind2); List<Element> subfields = datafield.getChildren(); for (Element sub : subfields) { org.w3c.dom.Element subfield = answer.createElement("subfield"); field.appendChild(subfield); String code = sub.getAttributeValue("code"); subfield.setAttribute("code", code); Text text = answer.createTextNode(sub.getText()); subfield.appendChild(text); if (tag.equals("100") && code.equals("a")) { author = sub.getText(); } // main title, create sorting title if (tag.equals("245") && code.equals("a")) { org.w3c.dom.Element sorting = answer.createElement("subfield"); field.appendChild(sorting); sorting.setAttribute("code", "x"); String subtext = sub.getText(); if (!ind2.trim().isEmpty()) { int numberOfNonfillingCharacter = new Integer(ind2).intValue(); subtext = subtext.substring(numberOfNonfillingCharacter); } title = subtext; Text sortingtext = answer.createTextNode(subtext); sorting.appendChild(sortingtext); } } } } plugin.setAtstsl(plugin.createAtstsl(title, author)); return marcRecord; } }