Java tutorial
/** * Copyright 2008 The University of North Carolina at Chapel Hill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.unc.lib.dl.ingest.aip; import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE; import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE; import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; import org.jrdf.JRDFFactory; import org.jrdf.SortedMemoryJRDFFactoryImpl; import org.jrdf.graph.BlankNode; import org.jrdf.graph.Graph; import org.jrdf.graph.GraphElementFactoryException; import org.jrdf.graph.GraphException; import org.jrdf.graph.Literal; import org.jrdf.graph.ObjectNode; import org.jrdf.graph.Triple; import org.jrdf.graph.TripleFactoryException; import org.jrdf.graph.URIReference; import org.jrdf.parser.ParseException; import org.jrdf.parser.Parser; import org.jrdf.parser.StatementHandlerException; import org.jrdf.parser.rdfxml.GraphRdfXmlParser; import org.jrdf.util.ClosableIterator; import org.jrdf.writer.BlankNodeRegistry; import org.jrdf.writer.RdfNamespaceMap; import org.jrdf.writer.mem.BlankNodeRegistryImpl; import org.jrdf.writer.mem.RdfNamespaceMapImpl; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.ingest.IngestException; import edu.unc.lib.dl.util.ContainerPlacement; import edu.unc.lib.dl.util.ContentModelHelper; import edu.unc.lib.dl.util.JRDFGraphUtil; import edu.unc.lib.dl.util.PremisEventLogger; import edu.unc.lib.dl.xml.FOXMLJDOMUtil; import edu.unc.lib.dl.xml.JDOMNamespaceUtil; /** * Extends the IngestContext behavior (in Decorator fashion) to allow query and manipulation of RELS-EXT RDF through an * in-memory graph. The in-memory graph is an efficient way of expressing object properties and relationships and can be * used to perform business logic and add properties without keep the RDF XML. * * @author count0 * */ public class RDFAwareAIPImpl implements ArchivalInformationPackage { private static final JRDFFactory JRDF_FACTORY = SortedMemoryJRDFFactoryImpl.getFactory(); public static RDFAwareAIPImpl getInstance(ArchivalInformationPackage aip) throws AIPException { RDFAwareAIPImpl result; if (aip instanceof RDFAwareAIPImpl) { result = (RDFAwareAIPImpl) aip; } else { result = new RDFAwareAIPImpl(aip); } return result; } private static final Log log = LogFactory.getLog(RDFAwareAIPImpl.class); private final ArchivalInformationPackage baseAIP; private Graph graph; /** * RDFIngestContext can be used as a decorator to add RDF graph capabilities in runtime. * * @param baseAIP * an ingest context */ public RDFAwareAIPImpl(ArchivalInformationPackage baseAIP) throws AIPException { this.baseAIP = baseAIP; this.graph = JRDF_FACTORY.getNewGraph(); addRELSEXT2Graph(); addFedoraLabel2Graph(); if (log.isInfoEnabled()) printGraph(); } /** * */ private void addFedoraLabel2Graph() { for (PID pid : this.baseAIP.getPIDs()) { Document foxml = this.baseAIP.getFOXMLDocument(pid); String label = FOXMLJDOMUtil.getLabel(foxml); try { if (label != null) { JRDFGraphUtil.addTriple(graph, new URI(pid.getURI()), ContentModelHelper.FedoraProperty.label.getURI(), label); } } catch (URISyntaxException e) { throw new Error("unexpected exception forming uri for string:" + pid.getURI(), e); } } } public void addRELSEXT2Graph() throws AIPException { for (PID pid : this.baseAIP.getPIDs()) { Document doc = this.baseAIP.getFOXMLDocument(pid); String str = getRELSEXT(doc); if (str == null) { continue; } StringReader r = new StringReader(str); try { Parser parser = new GraphRdfXmlParser(this.graph); parser.parse(r, "http://example.com/"); } catch (GraphException e) { log.error(e); } catch (IOException e) { log.error(e); } catch (ParseException e) { log.error(e); } catch (StatementHandlerException e) { log.error(e); } finally { r.close(); } } } /** * Call this method when you are done making modifications to the RDF Graph that may be needed by ingest filters that * are not RDF-aware. This method persists the RDF Graph in individual FOXML RELS-EXT datastreams. */ public void commitGraphChanges() throws IngestException { log.debug("Commiting graph changes to FOXML"); if (log.isDebugEnabled()) printGraph(); for (PID pid : this.baseAIP.getPIDs()) { Document doc = this.baseAIP.getFOXMLDocument(pid); saveFOXMLDocument(pid, doc); } } @Override public void delete() { this.baseAIP.delete(); } @Override public PremisEventLogger getEventLogger() { return this.baseAIP.getEventLogger(); } @Override public File getFileForUrl(String path) { return this.baseAIP.getFileForUrl(path); } @Override public Document getFOXMLDocument(PID pid) { return this.baseAIP.getFOXMLDocument(pid); } @Override public File getFOXMLFile(PID pid) { return this.baseAIP.getFOXMLFile(pid); } public Graph getGraph() { return this.graph; } @Override public Set<PID> getPIDs() { return this.baseAIP.getPIDs(); } private String getRELSEXT(Document foxml) { String result = null; // get the RELS-EXT element Namespace foxmlNS = Namespace.getNamespace("foxml", JDOMNamespaceUtil.FOXML_NS.getURI()); XPath relsNodeXPath; try { relsNodeXPath = XPath.newInstance( "/foxml:digitalObject/foxml:datastream[@ID='RELS-EXT']/foxml:datastreamVersion/foxml:xmlContent/rdf:RDF"); relsNodeXPath.addNamespace(foxmlNS); relsNodeXPath.addNamespace(JDOMNamespaceUtil.RDF_NS); Object o = relsNodeXPath.selectSingleNode(foxml); if (o == null) { log.debug("RELS-EXT not found"); return null; } Element el = (Element) o; StringWriter sw = new StringWriter(); XMLOutputter outputter = new XMLOutputter(); try { outputter.output(el, sw); } catch (IOException e) { throw new Error("Unexpected Error: Failed to write out element string: " + el); } result = sw.toString(); } catch (JDOMException e) { throw new Error(e); } return result; } @Override public File getTempFOXDir() { return this.baseAIP.getTempFOXDir(); } // @Override // public Integer getTopPIDContainerOrder(PID toppid) { // return this.baseAIP.getTopPIDContainerOrder(toppid); // } // // @Override // public String getTopPIDContainerPath(PID pid) { // return this.baseAIP.getTopPIDContainerPath(pid); // } @Override public Set<PID> getTopPIDs() { return this.baseAIP.getTopPIDs(); } /** * Get the uri part of the namespace. * * @param resourceURI * String URI * @return String namespace URI */ private String[] getURIParts(URIReference resource) { String[] result = new String[2]; String resourceURI = resource.getURI().toString(); int index1 = resourceURI.lastIndexOf('#'); int index2 = resourceURI.lastIndexOf('/'); int index = Math.max(index1, index2); result[0] = (index > 0 && index < resourceURI.length()) ? resourceURI.substring(0, ++index) : resourceURI; result[1] = (index > 0 && index < resourceURI.length()) ? resourceURI.substring(index, resourceURI.length()) : resourceURI; return result; } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.ReportingIngestBundle#prepareIngest() */ @Override public void prepareIngest() throws IngestException { log.debug("RDFIngestContext preparing for ingest"); commitGraphChanges(); this.baseAIP.prepareIngest(); } public void printGraph() { log.debug("Printing the RDF Graph in the RDFIngestContext"); ClosableIterator<Triple> iter = null; try { iter = this.graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); while (iter.hasNext()) { log.debug("Graph: " + iter.next()); } log.debug("Total number of statements: " + this.graph.getNumberOfTriples()); } catch (GraphException e) { log.error("problem with JRDF graph:", e); } finally { if (iter != null) iter.close(); } } @Override public void saveFOXMLDocument(PID pid, Document doc) { RdfNamespaceMap nsmap = new RdfNamespaceMapImpl(); BlankNodeRegistry blankReg = new BlankNodeRegistryImpl(); try { nsmap.load(graph); } catch (GraphException e) { throw new Error("Programmer error", e); } Element rdfElement = new Element("RDF", JDOMNamespaceUtil.RDF_NS); HashMap<String, Namespace> namespacesMap = new HashMap<String, Namespace>(); for (Map.Entry<String, String> name : nsmap.getNameEntries()) { String uri = name.getValue(); Namespace ns = Namespace.getNamespace(name.getKey(), uri); namespacesMap.put(uri, ns); rdfElement.addNamespaceDeclaration(ns); } String subjectURI = "info:fedora/" + pid.getPid(); Element rdfDescription = new Element("Description", JDOMNamespaceUtil.RDF_NS); rdfDescription.setAttribute("about", subjectURI); rdfElement.addContent(rdfDescription); // get the subject URIReference subject = null; try { subject = this.graph.getElementFactory().createResource(new URI(subjectURI)); } catch (GraphElementFactoryException e) { throw new Error(e); } catch (URISyntaxException e) { throw new Error(e); } // look for triples about this pid ClosableIterator<Triple> tripleIter = null; try { Triple aboutSubject = graph.getTripleFactory().createTriple(subject, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); tripleIter = graph.find(aboutSubject); while (tripleIter.hasNext()) { Triple t = tripleIter.next(); if (ContentModelHelper.FedoraProperty.label.getURI().toString() .equals(t.getPredicate().toString())) { continue; } Element statement = writeSubjectAssumedTriple(t, namespacesMap, blankReg); rdfDescription.addContent(statement); // log.debug(statement); } // end of triple loop FOXMLJDOMUtil.setInlineXMLDatastreamContent(doc, "RELS-EXT", "Relationship Metadata", rdfElement, false); } catch (GraphException e) { throw new Error(e); } catch (TripleFactoryException e) { throw new Error(e); } finally { if (tripleIter != null) tripleIter.close(); } // rewrite the FOXML to the file this.baseAIP.saveFOXMLDocument(pid, doc); } // @Override // public void setTopPIDLocation(String containerPath, PID topPID, Integer // order) { // this.baseAIP.setTopPIDLocation(containerPath, topPID, order); // } public void setTopPIDs(Set<PID> topPIDs) { this.baseAIP.setTopPIDs(topPIDs); } /** * Writes XML element(s) to represent an RDF triple where the subject is already implied by the surrounding XML. * * @param t * the triple to write * @param namespacesMap * a map of namespaces in use * @param blankReg * the blank node registry in use * @return */ private Element writeSubjectAssumedTriple(Triple t, HashMap<String, Namespace> namespacesMap, BlankNodeRegistry blankReg) { URIReference pred = (URIReference) t.getPredicate(); String[] parts = getURIParts(pred); Namespace ns = namespacesMap.get(parts[0]); Element statement = new Element(parts[1], ns); ObjectNode object = t.getObject(); if (object instanceof Literal) { Literal litObject = (Literal) object; statement.setText(litObject.getEscapedLexicalForm()); if (litObject.getDatatypeURI() != null) { String type = litObject.getDatatypeURI().toString(); statement.setAttribute("datatype", type); } if (litObject.getLanguage() != null) { // not yet implemented // Element e = new Element("lang", Namespace.XML_NAMESPACE); // e.setText(litObject.getLanguage()); // statement.addContent(e); } } else if (object instanceof URIReference) { URIReference uriObject = (URIReference) object; statement.setAttribute("resource", uriObject.getURI().toString()); } else if (object instanceof BlankNode) { // its a blank node, some // recursion! BlankNode node = (BlankNode) object; ClosableIterator<Triple> blankTripleIter = null; try { Triple aboutBlankNode = graph.getTripleFactory().createTriple(node, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); blankTripleIter = graph.find(aboutBlankNode); while (blankTripleIter.hasNext()) { Triple blankTrip = blankTripleIter.next(); Element el = writeSubjectAssumedTriple(blankTrip, namespacesMap, blankReg); statement.addContent(el); } } catch (GraphException e) { throw new Error(e); } catch (TripleFactoryException e) { throw new Error(e); } finally { if (blankTripleIter != null) blankTripleIter.close(); } } return statement; } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#getEmailRecipients() */ @Override public List<URI> getEmailRecipients() { return this.baseAIP.getEmailRecipients(); } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#getSendEmail() */ @Override public boolean getSendEmail() { return this.baseAIP.getSendEmail(); } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#setEmailRecipients (java.util.List) */ @Override public void setEmailRecipients(List<URI> recipients) { this.baseAIP.setEmailRecipients(recipients); } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#setSendEmail(boolean ) */ @Override public void setSendEmail(boolean sendEmail) { this.baseAIP.setSendEmail(sendEmail); } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#getTopPIDPlacement (edu.unc.lib.dl.fedora.PID) */ @Override public ContainerPlacement getContainerPlacement(PID pid) { return this.baseAIP.getContainerPlacement(pid); } /* * (non-Javadoc) * * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#setTopPIDPlacement (java.lang.String, * edu.unc.lib.dl.fedora.PID, java.lang.Integer, java.lang.Integer) */ @Override public void setContainerPlacement(PID parentPID, PID topPID, Integer designatedOrder, Integer sipOrder, String label) { this.baseAIP.setContainerPlacement(parentPID, topPID, designatedOrder, sipOrder, label); } @Override protected void finalize() throws Throwable { super.finalize(); // cleanup graph, etc. if (this.graph != null) { this.graph.close(); this.graph = null; } } /* (non-Javadoc) * @see edu.unc.lib.dl.ingest.aip.ArchivalInformationPackage#getDepositID() */ @Override public DepositRecord getDepositRecord() { return this.baseAIP.getDepositRecord(); } @Override public void setSubmitterGroups(String submitterGroups) { this.baseAIP.setSubmitterGroups(submitterGroups); } @Override public String getSubmitterGroups() { return this.baseAIP.getSubmitterGroups(); } }