List of usage examples for javax.xml.parsers DocumentBuilder setEntityResolver
public abstract void setEntityResolver(EntityResolver er);
From source file:com.xpn.xwiki.pdf.impl.PdfExportImpl.java
/** * Cleans up an HTML document, turning it into valid XHTML. * /*from www.j a v a 2 s . com*/ * @param input the source HTML to process * @return the cleaned up source */ private String convertToStrictXHtml(String input) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Cleaning HTML: " + input); } try { // First step, Tidy the document StringWriter tidyOutput = new StringWriter(input.length()); this.tidy.parse(new StringReader(input), tidyOutput); // Tidy can't solve duplicate IDs, so it needs to be done manually DocumentBuilder docBuilder = dbFactory.newDocumentBuilder(); docBuilder.setEntityResolver(Utils.getComponent(EntityResolver.class)); String tidied = tidyOutput.toString().trim(); if (StringUtils.isEmpty(tidied)) { tidied = input.trim(); } Document doc = docBuilder.parse(new InputSource(new StringReader(tidied))); List<String> seenIDs = new ArrayList<String>(); this.cleanIDs(doc.getDocumentElement(), seenIDs); // Write back the fixed document to a String LSOutput output = lsImpl.createLSOutput(); StringWriter result = new StringWriter(); output.setCharacterStream(result); LSSerializer serializer = lsImpl.createLSSerializer(); serializer.setNewLine("\n"); output.setEncoding(doc.getXmlEncoding()); serializer.write(doc, output); return result.toString(); } catch (Exception ex) { LOGGER.warn("Failed to tidy document for export: " + ex.getMessage(), ex); return input; } }
From source file:com.adaptris.util.XmlUtils.java
private DocumentBuilder documentBuilder() throws ParserConfigurationException { DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); if (entityResolver != null) { builder.setEntityResolver(entityResolver); }// www . j av a2 s.c o m return builder; }
From source file:com.litwan.yanel.impl.resources.svg.SvgEditResource.java
/** * Checks if InputStream is wellformed/* ww w. j a v a 2 s.co m*/ * @return boolean true if wellformed, false if not * @param InputStream which is checked if wellformed * @throws UsecaseException */ private boolean isWellformed(InputStream is) throws UsecaseException { try { //TODO: code borrowed from YanelServlet.java r40436. see line 902. 1. maybe there is a better way to do so. 2. this code could maybe be refactored into a some xml.util lib. javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance(); javax.xml.parsers.DocumentBuilder parser = dbf.newDocumentBuilder(); // NOTE: DOCTYPE is being resolved/retrieved (e.g. xhtml schema from w3.org) also // if isValidating is set to false. // Hence, for performance and network reasons we use a local catalog ... // Also see http://www.xml.com/pub/a/2004/03/03/catalogs.html // resp. http://xml.apache.org/commons/components/resolver/ // TODO: What about a resolver factory? parser.setEntityResolver(new CatalogResolver()); parser.parse(is); return true; } catch (org.xml.sax.SAXException e) { addError("Document is not wellformed: " + e.getMessage() + " "); return false; } catch (Exception e) { addError(e.getMessage()); return false; } }
From source file:com.panet.imeta.core.xml.XMLHandler.java
/** * Load a file into an XML document/*w w w .ja v a 2 s. c o m*/ * * @param filename * The filename to load into a document * @param systemId * Provide a base for resolving relative URIs. * @param ignoreEntities * Ignores external entities and returns an empty dummy. * @param namespaceAware * support XML namespaces. * @return the Document if all went well, null if an error occured! */ public static final Document loadXMLFile(FileObject fileObject, String systemID, boolean ignoreEntities, boolean namespaceAware) throws KettleXMLException { DocumentBuilderFactory dbf; DocumentBuilder db; Document doc; try { // Check and open XML document dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringComments(true); dbf.setNamespaceAware(namespaceAware); db = dbf.newDocumentBuilder(); // even dbf.setValidating(false) will the parser NOT prevent from // checking the existance of the DTD // thus we need to give the BaseURI (systemID) below to have a // chance to get it // or return empty dummy documents for all external entities // (sources) if (ignoreEntities) db.setEntityResolver(new DTDIgnoringEntityResolver()); InputStream inputStream = null; try { if (Const.isEmpty(systemID)) { // Normal parsing // inputStream = KettleVFS.getInputStream(fileObject); doc = db.parse(inputStream); } else { // Do extra verifications // String systemIDwithEndingSlash = systemID.trim(); // make sure we have an ending slash, otherwise the last // part will be ignored if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) { systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/"); } inputStream = KettleVFS.getInputStream(fileObject); doc = db.parse(inputStream, systemIDwithEndingSlash); } } catch (FileNotFoundException ef) { throw new KettleXMLException(ef); } finally { if (inputStream != null) inputStream.close(); } return doc; } catch (Exception e) { throw new KettleXMLException("Error reading information from file", e); } }
From source file:com.autentia.mvn.plugin.changes.BugzillaChangesMojo.java
/** * Gets bugs XML document from Bugzilla. * /*from w w w .j a v a 2 s .c o m*/ * @param client * @param bugsIds * @return * @throws MojoExecutionException */ private Document getBugsDocument(final HttpClient client, final String bugsIds) throws MojoExecutionException { final String link = this.bugzillaUrl + SHOWBUG_URL; try { final byte[] response = this.httpRequest.sendPostRequest(client, link, bugsIds); final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); final DocumentBuilder db = dbf.newDocumentBuilder(); db.setEntityResolver(new EntityResolver() { public InputSource resolveEntity(final String publicId, final String systemId) throws SAXException, IOException { return new InputSource( this.getClass().getClassLoader().getResourceAsStream("bugzilla3/bugzilla.dtd")); } }); final ByteArrayInputStream bais = new ByteArrayInputStream(response); final Document docBugzilla = db.parse(bais); this.cleanBugzillaDocument(docBugzilla); return docBugzilla; } catch (final HttpStatusException e) { this.getLog().warn("Can not recover bugs in XML", e); throw new MojoExecutionException("Can not recover bugs in XML.", e); } catch (final IOException e) { this.getLog().warn("Can not recover bugs in XML", e); throw new MojoExecutionException("Can not recover bugs in XML.", e); } catch (final ParserConfigurationException e) { this.getLog().warn("Can not parse XML bugs", e); throw new MojoExecutionException("Can not parse XML bugs.", e); } catch (final SAXException e) { this.getLog().warn("Can not build bugs XML document", e); throw new MojoExecutionException("Can not build bugs XML document.", e); } }
From source file:com.panet.imeta.core.xml.XMLHandler.java
/** * Load a file into an XML document/* ww w . j a v a 2 s . c om*/ * * @param inputStream * The stream to load a document from * @param systemId * Provide a base for resolving relative URIs. * @param ignoreEntities * Ignores external entities and returns an empty dummy. * @param namespaceAware * support XML namespaces. * @return the Document if all went well, null if an error occured! */ public static final Document loadXMLFile(InputStream inputStream, String systemID, boolean ignoreEntities, boolean namespaceAware) throws KettleXMLException { DocumentBuilderFactory dbf; DocumentBuilder db; Document doc; try { // Check and open XML document // dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringComments(true); dbf.setNamespaceAware(namespaceAware); db = dbf.newDocumentBuilder(); // even dbf.setValidating(false) will the parser NOT prevent from // checking the existance of the DTD // thus we need to give the BaseURI (systemID) below to have a // chance to get it // or return empty dummy documents for all external entities // (sources) // if (ignoreEntities) { db.setEntityResolver(new DTDIgnoringEntityResolver()); } try { if (Const.isEmpty(systemID)) { // Normal parsing // doc = db.parse(inputStream); } else { // Do extra verifications // String systemIDwithEndingSlash = systemID.trim(); // make sure we have an ending slash, otherwise the last // part will be ignored // if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) { systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/"); } doc = db.parse(inputStream, systemIDwithEndingSlash); } } catch (FileNotFoundException ef) { throw new KettleXMLException(ef); } finally { if (inputStream != null) inputStream.close(); } return doc; } catch (Exception e) { throw new KettleXMLException("Error reading information from input stream", e); } }
From source file:com.meidusa.amoeba.context.ProxyRuntimeContext.java
private ProxyServerConfig loadConfig(String configFileName) { DocumentBuilder db; try {/*from w w w. ja v a 2s. co m*/ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(true); dbf.setNamespaceAware(false); db = dbf.newDocumentBuilder(); db.setEntityResolver(new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) { if (systemId.endsWith("amoeba.dtd")) { InputStream in = ProxyRuntimeContext.class .getResourceAsStream("/com/meidusa/amoeba/xml/amoeba.dtd"); if (in == null) { LogLog.error("Could not find [amoeba.dtd]. Used [" + ProxyRuntimeContext.class.getClassLoader() + "] class loader in the search."); return null; } else { return new InputSource(in); } } else { return null; } } }); db.setErrorHandler(new ErrorHandler() { public void warning(SAXParseException exception) { } public void error(SAXParseException exception) throws SAXException { logger.error(exception.getMessage() + " at (" + exception.getLineNumber() + ":" + exception.getColumnNumber() + ")"); throw exception; } public void fatalError(SAXParseException exception) throws SAXException { logger.fatal(exception.getMessage() + " at (" + exception.getLineNumber() + ":" + exception.getColumnNumber() + ")"); throw exception; } }); return loadConfigurationFile(configFileName, db); } catch (Exception e) { logger.fatal("Could not load configuration file, failing", e); throw new ConfigurationException("Error loading configuration file " + configFileName, e); } }
From source file:de.betterform.xml.xforms.XFormsProcessorImpl.java
private DocumentBuilder getDocumentBuilder() throws XFormsException { // ensure xerces dom try {// w w w .j a v a2 s . c o m DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(false); factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); // factory.setAttribute("http://apache.org/xml/properties/dom/document-class-name", "org.apache.xerces.dom.DocumentImpl"); DocumentBuilder db = factory.newDocumentBuilder(); // use an empty entity resolver to avoid that Xerces may try to // download the system DTD (can cause latency problems) db.setEntityResolver(new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { return null; }; }); return db; } catch (Exception e) { throw new XFormsException(e); } }
From source file:ambit.data.qmrf.QMRFObject.java
public Document readDocument(InputSource source, boolean validating, EntityResolver resolver) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); /*// w ww. j a v a 2 s . com factory.setNamespaceAware(true); factory.setValidating(validating); */ DocumentBuilder builder = factory.newDocumentBuilder(); builder.setErrorHandler(new SimpleErrorHandler(getClass().getName())); // Install the entity resolver if (resolver != null) builder.setEntityResolver(resolver); return builder.parse(source); }
From source file:ambit2.data.qmrf.QMRFObject.java
/** * Converts FO DOM Document into PDF (uses Apache FOP). * @param foDom Document FO namespace xmlns:fo="http://www.w3.org/1999/XSL/Format" * @param pdf//w w w . j av a2s .c o m protected void convertDOM2PDF(Document foDom, OutputStream pdf) { try { // configure fopFactory as desired FopFactory fopFactory = FopFactory.newInstance(); String foNS = "http://www.w3.org/1999/XSL/Format"; FOUserAgent foUserAgent = fopFactory.newFOUserAgent(); // configure foUserAgent as desired // Setup output BufferedOutputStream out = new java.io.BufferedOutputStream(pdf); try { // Construct fop with desired output format and output stream Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, out); // Setup Identity Transformer TransformerFactory xfactory = TransformerFactory.newInstance(); Transformer transformer = xfactory.newTransformer(); // identity transformer // Setup input for XSLT transformation Source src = new DOMSource(foDom); // Resulting SAX events (the generated FO) must be piped through to FOP Result res = new SAXResult(fop.getDefaultHandler()); // Start XSLT transformation and FOP processing transformer.transform(src, res); } finally { out.close(); } } catch (Exception e) { e.printStackTrace(System.err); System.exit(-1); } } */ /* * Needs Xerces Parser - to verify if can work with other SAX parsers public void readSchema(InputStream in) throws Exception { SAXParser parser = new SAXParser(); DeclHandler handler = new CustomDeclHandler(this); parser.setProperty("http://xml.org/sax/properties/declaration-handler", handler); String filename = "ambit/data/qmrf/qmrf.dtd"; //InputStream stream = this.getClass().getClassLoader().getResourceAsStream(filename); InputStream stream = new FileInputStream("qmrf_empty.xml"); parser.parse(new InputSource(stream)); stream.close(); } */ public Document readDocument(InputSource source, boolean validating, EntityResolver resolver) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); /* factory.setNamespaceAware(true); factory.setValidating(validating); */ DocumentBuilder builder = factory.newDocumentBuilder(); builder.setErrorHandler(new SimpleErrorHandler(getClass().getName())); // Install the entity resolver if (resolver != null) builder.setEntityResolver(resolver); return builder.parse(source); }