Java tutorial
/* (c) Copyright 2006 Hewlett-Packard Development Company, LP [See end of file] $Id: GRDDL.java 2237 2007-09-24 10:04:04Z jeremy_carroll $ */ package com.hp.hpl.jena.grddl.impl; import java.io.IOException; import java.io.InterruptedIOException; import java.io.PipedReader; import java.io.PipedWriter; import java.io.StringReader; import java.io.StringWriter; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.sax.SAXResult; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import net.sf.saxon.Controller; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; import com.hp.hpl.jena.grddl.GRDDLReader; import com.hp.hpl.jena.grddl.GRDDLSecurityException; import com.hp.hpl.jena.grddl.license.License; import com.hp.hpl.jena.rdf.arp.JenaReader; import com.hp.hpl.jena.rdf.arp.SAX2Model; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.util.cache.Cache; import com.hp.hpl.jena.util.cache.CacheManager; import com.hp.hpl.jena.vocabulary.RDF; /** * ReaderImpl * * @author Jeremy J. Carroll */ public class GRDDL { static { License.check(); } final List<String> transforms = new ArrayList<String>(); final List<String> alternates = new ArrayList<String>(); public static final String NAMESPACE = "http://www.w3.org/2003/g/data-view#"; public static final String PROFILE = "http://www.w3.org/2003/g/data-view"; public static final String XHTMLNS[] = { "http://www.w3.org/1999/xhtml", "http://www.w3.org/2002/06/xhtml2/", "" }; private static final Property ProfileTransformation = ResourceFactory.createProperty(NAMESPACE, "profileTransformation"); private static final Property NamespaceTransformation = ResourceFactory.createProperty(NAMESPACE, "namespaceTransformation"); final Model model; final GRDDLReader reader; Rewindable input; public GRDDL(GRDDLReader r, Model m, String url) throws IOException { this(r, m); RewindableURL rewindableURL = new RewindableURL(url, this); input = rewindableURL; // if (!rewindableURL.conn.getURL().toString().equals(url)) // System.err.println("URL changed: "+rewindableURL.conn.getURL() + " was "+url); if (((GRDDLReaderBase) r).headers.containsKey("negotiate")) { tcn(rewindableURL); } } private void tcn(RewindableURL rewindableURL) { String alternates = rewindableURL.conn.getHeaderField("alternates"); if (alternates == null) return; String current = rewindableURL.conn.getHeaderField("content-location"); if (current == null) current = input.retrievalIRI(); else current = input.resolveAgainstRetrievalIRI(current); parseAlternates(alternates, current); } private void parseAlternates(String alternates, String done) { Iterator<String> it = tokenize(alternates).iterator(); int state = 0; while (it.hasNext()) { String n = it.next(); if (n.equals(",")) { state = 0; } else if (state == 0 && n.equals("{")) { state = 1; } else if (state == 1 && n.startsWith("\"")) { String a = unescape(n.substring(1, n.length() - 1)); a = input.resolveAgainstRetrievalIRI(a); if (!a.equals(done)) { addAlternate(a); } state = 2; } else { state = 2; } } } static Pattern unescaper = Pattern.compile("\\\\(.)"); /** * Replace \. with . * @param string Does not end in unescaped \ * @return unescaped version */ public String unescape(String string) { return unescaper.matcher(string).replaceAll("$1"); } static Pattern tokenizer = Pattern.compile( // quoted-string "\"([^\\\"]|\\.)*\"" + "|" + // separators "[\\0133\\0135()<>@,;:\\/?={}]" + "|" + // tokens "[\\041-\\0176&&[^\\0133\\0135()<>@,;:\\\"/?={}]]+"); /** * Split input into tokens * ignoring whitespace * except the quoted-string production. * @param alternates * @return */ private List<String> tokenize(String alternates) { List<String> r = new ArrayList<String>(); // System.err.println("Tokenizing: "+alternates); Matcher m = tokenizer.matcher(alternates); while (m.find()) { r.add(m.group()); } return r; } public GRDDL(GRDDLReader r, Model m, Rewindable rw) { this(r, m); input = rw; } private GRDDL(GRDDLReader r, Model m) { model = m; reader = r; } /** * Apply GRDDL algorithm. * * @throws IOException * */ public synchronized void go() throws IOException { initialParse(); reapTransforms(); applyTransforms(); getAlternates(); } private void getAlternates() { Iterator<String> urls = alternates.iterator(); while (urls.hasNext()) reader.read(model, urls.next()); } private void addAlternate(String a) { alternates.add(a); } private TransformerFactory xformFactory() { return ((GRDDLReaderBase) reader).xformFactory; } private void initialParse() throws IOException { schemas = new HashSet<String>(); profiles = new HashSet<String>(); needTidy = false; GRDDLReaderBase readerB = ((GRDDLReaderBase) reader); input.useSaxOrTidy(readerB.saxParser); InitialContentHandler ic; ic = new InitialContentHandler(readerB.disabled); SAXResult r = new SAXResult(ic); try { Transformer idTransform = xformFactory().newTransformer(); try { idTransform.transform(input.startAfresh(true), r); } catch (SeenEnoughExpectedException e) { // parse finished normally } catch (TransformerException e) { if (!e.getCause().getClass().equals(SeenEnoughExpectedException.class)) { throw e; } } if (needTidy) { input.useSaxOrTidy(readerB.tidyParser); ic = new InitialContentHandler(readerB.disabled); r = new SAXResult(ic); idTransform.transform(input.startAfresh(true), r); } } catch (SeenEnoughExpectedException e) { } catch (TransformerException e) { if (!e.getCause().getClass().equals(SeenEnoughExpectedException.class)) { throw new JenaException(e); } // else parse finished normally } catch (RuntimeException rte) { throw rte; } catch (IOException ioe) { throw ioe; } catch (Exception nrte) { throw new JenaException(nrte); } finally { input.close(); } addTransforms(ic.html ? readerB.htmlXforms : readerB.xmlXforms); } private void addTransforms(Set<String> xforms) { if (xforms != null) { Iterator<String> it = xforms.iterator(); while (it.hasNext()) { addTransform(it.next()); } } } private void applyTransforms() { Iterator<String> it = transforms.iterator(); while (it.hasNext()) { String n = it.next(); transformWith(n, it.hasNext()); } } private boolean transformWith(String string, boolean needRewind) { try { try { final Transformer t = transformerFor(string); String mimetype = mimetype(t); final Result result = resultFor(mimetype); if (result == null) return false; final Source in = input.startAfresh(needRewind); runInSandbox(new TERunnable() { public void run() throws TransformerException { t.transform(in, result); } }, true); postProcess(mimetype, result); return true; } catch (TransformerException e) { error(e); return false; } catch (SAXParseException e) { error(e); return false; } catch (InterruptedException e) { throw new InterruptedIOException("In GRDDL transformWith"); } finally { input.close(); if (subThread != null) subThread.interrupt(); } } catch (IOException ioe) { error(ioe); return false; } } private void postProcess(String mimetype, Result result) throws IOException, InterruptedException { if (mimetype.equalsIgnoreCase("text/rdf+n3")) endN3(result); if (mimetype.equalsIgnoreCase("text/html")) endGrddlResult(result); if (mimetype.equalsIgnoreCase("application/xhtml+xml")) endGrddlResult(result); } private void endGrddlResult(Result result) { // TODO security issues here StreamResult sr = (StreamResult) result; StringWriter sw = (StringWriter) sr.getWriter(); String html = sw.toString(); System.err.println(html); reader.read(model, new StringReader(html), input.retrievalIRI()); } private void endN3(final Result result) throws IOException, InterruptedException { ((StreamResult) result).getWriter().close(); if (subThread != null) subThread.join(); subThread = null; } private void runInSandbox(final TERunnable r, boolean protect) throws TransformerException { final TransformerException te[] = new TransformerException[] { null }; final RuntimeException re[] = new RuntimeException[] { null }; Runnable rr = new Runnable() { public void run() { try { r.run(); } catch (TransformerException e) { te[0] = e; } catch (RuntimeException e) { re[0] = e; } } }; if (protect) { // Check that the user is aware of the risk, // before running the untrusted code. License.check(); SecManager.sandbox(rr); } else { rr.run(); } if (te[0] != null) throw te[0]; if (re[0] != null) throw re[0]; } private void fatalError(Exception e) { ((GRDDLReaderBase) reader).checkException(e); reader.eHandler().fatalError(e); } private void error(Exception e) { ((GRDDLReaderBase) reader).checkException(e); reader.eHandler().error(e); } private Result resultFor(String mimeType) throws SAXParseException, IOException { if (mimeType.equalsIgnoreCase("text/rdf+n3")) return n3result(); if (mimeType.equalsIgnoreCase("text/html")) return grddlResult(); if (mimeType.equalsIgnoreCase("application/xhtml+xml")) return grddlResult(); if (mimeType.equalsIgnoreCase("application/rdf+xml")) return rdfXmlResult(); if (mimeType.equalsIgnoreCase("application/xml")) return rdfXmlResult(); System.err.println("Unsupported mimetype: " + mimeType); return null; } private String mimetype(Transformer t) { String mt = t.getOutputProperty(OutputKeys.MEDIA_TYPE); return mt == null ? "application/rdf+xml" : mt; } private Result grddlResult() throws IOException { return new StreamResult(new StringWriter() ); } Thread subThread = null; private PipedWriter pipe; private boolean needTidy = false; private Set<String> profiles; private Result n3result() throws IOException { pipe = new PipedWriter(); final PipedReader pr = new PipedReader(pipe); Result rslt = new StreamResult(pipe); subThread = new Thread() { public void run() { ((GRDDLReaderBase) reader).n3.read(model, pr, input.retrievalIRI()); } }; subThread.start(); return rslt; } private Result rdfXmlResult() throws SAXParseException { // TODO check resolve here SAX2Model s2m = SAX2Model.create(input.resolve(""), model); s2m.setErrorHandler(reader.eHandler()); s2m.setOptionsWith(((JenaReader) ((GRDDLReaderBase) reader).rdfxml).getOptions()); SAXResult r = new SAXResult(s2m); r.setLexicalHandler(s2m); return r; } private Transformer transformerFor(final String url) throws TransformerException { if (url.equals("RDF/XML")) { return xformFactory().newTransformer(); } else { logurl(url); try { ((GRDDLReaderBase) reader).lastSecurityException = null; final Transformer rslt[] = { null }; // TODO network and source issues final Source src = xsltStreamSource(url); runInSandbox(new TERunnable() { public void run() throws TransformerException { rslt[0] = xformFactory().newTransformer(src); } }, true); SafeURIResolver safeURIResolver = new SafeURIResolver(); rslt[0].setURIResolver(safeURIResolver); ((Controller) rslt[0]).setUnparsedTextURIResolver(safeURIResolver); return rslt[0]; } // catch (AssertionError e) { // if (e.getMessage().startsWith("Failed to load system function: unparsed-text()")) // throw new GRDDLSecurityException("unparsed-text() not permitted in this implementation"); // throw e; // } catch (SecurityException e) { throw new GRDDLSecurityException(e); } catch (TransformerException e) { // if (e.toString().contains("result-document") // || e.toString().contains("disabled") // || e.toString().contains("extension") ) // throw new GRDDLSecurityException(e); if (((GRDDLReaderBase) reader).lastSecurityException != null) throw ((GRDDLReaderBase) reader).lastSecurityException; System.err.println("<" + url + "> A.Rethrowing " + e.getMessage()); throw e; } catch (RuntimeException e) { System.err.println("<" + url + "> B.Rethrowing " + e.toString() + ":" + e.getMessage()); throw e; } } } static Set<String> allUrls = new HashSet<String>(); static void logurl(String url) { // if (!url.startsWith("http://www.w3.org/2001/sw/grddl-wg/td/")) if (!allUrls.contains(url)) { allUrls.add(url); // System.err.println("Using url: "+url); } } private Source xsltStreamSource(String url) throws TransformerException { try { URL urlx = new URL(url); URLConnection conn = urlx.openConnection(); conn.setRequestProperty("accept", "application/xslt+xml; q=1.0, " + "text/xsl; q=0.8, " + "application/xsl; q=0.8, " + "application/xml; q=0.7, " + "text/xml; q=0.6, " + "application/xsl+xml; q=0.8, " + "*/*; q=0.1"); return new StreamSource(conn.getInputStream(), conn.getURL().toString()); } catch (IOException e) { throw new TransformerException(e); } } /* * * Read beginning of file ... as XML if first element is xhtml:html then a) * assume legal XHTML+XML read as XML looking for profile and/or grddl * transformation thing if profile found then need to go through whole doc * looking for rel's either transformation or profileTransformation and * apply b) if first element is html or case variants and not in xml * namespace then apply tidy (streaming) and go to a) c) other wise look for * grddl namespace stuff at top level and/or in namespace doc * * * */ private class InitialContentHandler extends DefaultHandler { private boolean disabled; InitialContentHandler(boolean dis) { disabled = dis; } boolean rootElement = true; boolean grddlNamespace = false; boolean grddlProfile = false; boolean html = false; boolean inHead = false; public void endElement(String uri, String localName, String qname) { trace("<element: " + qname); if (html && isHtmlNS(uri) && localName.equalsIgnoreCase("html")) { inHead = false; } } public void startElement(String uri, String localName, String qname, Attributes attr) throws SAXException { trace(">element: " + qname); if (rootElement) { html = isHtmlMimetype(); if (uri != null && !uri.equals("")) { html = html || isHtmlNS(uri); if (disabled) throw new SeenEnoughExpectedException(); checkSchema(input.resolve(uri)); } else if (localName.equalsIgnoreCase("html")) html = true; if (grddlNamespace) checkRootAttrs(attr); if (disabled || !html) throw new SeenEnoughExpectedException(); if (!localName.equalsIgnoreCase("html")) { needTidy(); // doesn't usually return } } // doing html if (attr.getValue("xml:base") != null) { // TODO error handling System.err.println(input.retrievalIRI() + ": xml:base should not be used within HTML - ignored"); } if (!grddlProfile) { if (localName.equalsIgnoreCase("head") && isHtmlNS(uri)) { checkProfileAttrs(attr); inHead = true; } if ((!rootElement) && (!grddlProfile)) { // The head must be second, or maybe first // in ill-formed HTML. // If we've not seen the profile by this // point, then we won't and we stop. // throw new SeenEnoughExpectedException(); } rootElement = false; if ((!inHead) && !grddlProfile) return; } if (localName.equalsIgnoreCase("base") && isHtmlNS(uri)) { String href = attr.getValue("href"); if (href != null) { // System.err.println("setting base to: "+href); input.setBase(href); } } if (!grddlProfile) return; if (localName.equalsIgnoreCase("a") || localName.equalsIgnoreCase("link")) { checkLinkAttrs(attr); } } private boolean isHtmlNS(String uri) { for (int i = 0; i < XHTMLNS.length; i++) if (uri.equalsIgnoreCase(XHTMLNS[i])) { return true; } return false; } public void fatalError(SAXParseException e) throws SAXException { if (grddlProfile || isHtmlMimetype()) { needTidy(); } reader.eHandler().fatalError(e); } public void error(SAXParseException e) { reader.eHandler().error(e); } public void warning(SAXParseException e) { reader.eHandler().warning(e); } private void checkLinkAttrs(Attributes attr) { String rel = getValueIgnoreCase(attr, "rel"); if (rel == null) return; String r[] = rel.split(" +"); for (int i = 0; i < r.length; i++) { if (r[i].equalsIgnoreCase("transformation")) addTransform(input.resolve(getValueIgnoreCase(attr, "href"))); } } private String getValueIgnoreCase(Attributes attr, String arg) { int ln = attr.getLength(); for (int i = 0; i < ln; i++) { if (arg.equalsIgnoreCase(attr.getQName(i))) return attr.getValue(i); } return null; } private void checkProfileAttrs(Attributes attr) { String profs = getValueIgnoreCase(attr, "profile"); if (profs == null) return; String p[] = profs.split("[ \t\n]+"); for (int i = 0; i < p.length; i++) { if (PROFILE.equals(p[i]) || NAMESPACE.equals(p[i])) { // System.err.println("Grddl profile"); grddlProfile = true; } else checkProfile(input.resolve(p[i])); } } private void checkRootAttrs(Attributes attr) { String xmlBase = attr.getValue("xml:base"); if (xmlBase != null) input.setBase(xmlBase); String transforms = attr.getValue(NAMESPACE, "transformation"); if (transforms == null) return; String t[] = transforms.split(" +"); for (int i = 0; i < t.length; i++) addTransform(input.resolve(t[i])); } public void startPrefixMapping(String prefix, String uri) throws SAXException { if (rootElement && (!grddlNamespace) && NAMESPACE.equals(uri)) { grddlNamespace = true; } } } private boolean isHtmlMimetype() { String mime = input.mimetype(); if (mime == null) return false; return mime.equalsIgnoreCase("text/html") || mime.equalsIgnoreCase("application/xhtml+xml"); } void addTransform(String tUrl) { trace("transform: " + tUrl); // if ("http://www.w3.org/2003/g/sq1t.xsl".equals(tUrl)) // tUrl = "http://www.w3.org/2001/sw/grddl-wg/td/sq1t.xsl"; // if ( tUrl.startsWith("http://www.w3.org/2003/g/") // && ( tUrl.contains("embeddedRDF") // || tUrl.contains("glean-profile") ) ) // tUrl = "http://lists.w3.org/Archives/Public/public-grddl-wg/2007Mar/att-0104/" // + // tUrl.substring("http://www.w3.org/2003/g/".length()); // transforms.add(tUrl); } private void checkProfile(String pUrl) { trace("profile: " + pUrl); profiles.add(pUrl); // reapTransforms(pUrl, ProfileTransformation); } private void reapTransforms() { Set<String> ss = schemas; Set<String> ps = profiles; reapTransforms(ss, NamespaceTransformation); reapTransforms(ps, ProfileTransformation); } private void reapTransforms(Set<String> ss, Property prop) { Iterator<String> it = ss.iterator(); while (it.hasNext()) reapTransforms(it.next(), prop); } private void reapTransforms(String pUrl, Property property) { // System.err.println("reaping: "+pUrl+" "+property.getURI()); StmtIterator it = getModel(pUrl).createResource(pUrl).listProperties(property); while (it.hasNext()) { RDFNode n = it.nextStatement().getObject(); if (n.isURIResource()) { // System.err.println("reaped: "+((Resource) n).getURI()); addTransform(((Resource) n).getURI()); } else { warning("Bad " + property.getLocalName() + "value in <" + pUrl + ">. No tranform applied for this value."); } } } private void warning(String string) { reader.eHandler().warning(new GRDDLWarningException(string)); } /* * private void addModel(String url) { model.read(url, "GRDDL"); } */ // static Map<String, Model> known = new HashMap<String, Model>(); static Cache known = CacheManager.createCache(CacheManager.RAND, "GRDDL schema/profile cache", 300); // static private boolean saveFlag; private Model getModel(String url) { Model m = (Model) known.get(url); if (m == null) { // if (url.equals(PROFILE)) // saveFlag = true; m = ModelFactory.createDefaultModel(); known.put(url, m); try { reader.read(m, url); } catch (GRDDLSecurityException e) { // escalate security issues throw e; } catch (Exception e) { // ignore anything else m = ModelFactory.createDefaultModel(); known.put(url, m); } // if (saveFlag) { // System.out.println("<!-- GRDDL of "+url+"-->"); // m.write(System.out); // } // if (url.equals(PROFILE)) // saveFlag = false; } return m; } public void needTidy() { trace("needTidy"); needTidy = true; throw new SeenEnoughExpectedException(); } private Set<String> schemas; private void checkSchema(String uri) { trace("schema: " + uri); if (uri.equals(RDF.getURI())) { addTransform("RDF/XML"); } else schemas.add(uri); } public static final Log logger = LogFactory.getLog(GRDDL.class); private void trace(String string) { // System.err.println(string); logger.trace(string); } void setHeaders(URLConnection conn) { ((GRDDLReaderBase) reader).setHeaders(conn); } } /* * (c) Copyright 2006 Hewlett-Packard Development Company, LP All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */