Example usage for javax.xml.parsers DocumentBuilder setEntityResolver

List of usage examples for javax.xml.parsers DocumentBuilder setEntityResolver

Introduction

In this page you can find the example usage for javax.xml.parsers DocumentBuilder setEntityResolver.

Prototype


public abstract void setEntityResolver(EntityResolver er);

Source Link

Document

Specify the EntityResolver to be used to resolve entities present in the XML document to be parsed.

Usage

From source file:com.xpn.xwiki.pdf.impl.PdfExportImpl.java

/**
 * Cleans up an HTML document, turning it into valid XHTML.
 * /*from www.j  a  v  a  2  s .  com*/
 * @param input the source HTML to process
 * @return the cleaned up source
 */
private String convertToStrictXHtml(String input) {
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("Cleaning HTML: " + input);
    }

    try {
        // First step, Tidy the document
        StringWriter tidyOutput = new StringWriter(input.length());
        this.tidy.parse(new StringReader(input), tidyOutput);

        // Tidy can't solve duplicate IDs, so it needs to be done manually
        DocumentBuilder docBuilder = dbFactory.newDocumentBuilder();
        docBuilder.setEntityResolver(Utils.getComponent(EntityResolver.class));
        String tidied = tidyOutput.toString().trim();
        if (StringUtils.isEmpty(tidied)) {
            tidied = input.trim();
        }
        Document doc = docBuilder.parse(new InputSource(new StringReader(tidied)));
        List<String> seenIDs = new ArrayList<String>();
        this.cleanIDs(doc.getDocumentElement(), seenIDs);

        // Write back the fixed document to a String
        LSOutput output = lsImpl.createLSOutput();
        StringWriter result = new StringWriter();
        output.setCharacterStream(result);
        LSSerializer serializer = lsImpl.createLSSerializer();
        serializer.setNewLine("\n");
        output.setEncoding(doc.getXmlEncoding());
        serializer.write(doc, output);
        return result.toString();
    } catch (Exception ex) {
        LOGGER.warn("Failed to tidy document for export: " + ex.getMessage(), ex);
        return input;
    }
}

From source file:com.adaptris.util.XmlUtils.java

private DocumentBuilder documentBuilder() throws ParserConfigurationException {
    DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
    if (entityResolver != null) {
        builder.setEntityResolver(entityResolver);
    }// www  . j  av a2  s.c o m
    return builder;
}

From source file:com.litwan.yanel.impl.resources.svg.SvgEditResource.java

/**
 * Checks if InputStream is wellformed/* ww  w. j  a  v  a  2  s.co  m*/
 * @return boolean true if wellformed, false if not
 * @param InputStream which is checked if wellformed
 * @throws UsecaseException
 */
private boolean isWellformed(InputStream is) throws UsecaseException {
    try {
        //TODO: code borrowed from YanelServlet.java r40436. see line 902. 1. maybe there is a better way to do so. 2. this code could maybe be refactored into a some xml.util lib. 
        javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance();
        javax.xml.parsers.DocumentBuilder parser = dbf.newDocumentBuilder();
        // NOTE: DOCTYPE is being resolved/retrieved (e.g. xhtml schema from w3.org) also
        //       if isValidating is set to false.
        //       Hence, for performance and network reasons we use a local catalog ...
        //       Also see http://www.xml.com/pub/a/2004/03/03/catalogs.html
        //       resp. http://xml.apache.org/commons/components/resolver/
        // TODO: What about a resolver factory?
        parser.setEntityResolver(new CatalogResolver());
        parser.parse(is);
        return true;
    } catch (org.xml.sax.SAXException e) {
        addError("Document is not wellformed: " + e.getMessage() + " ");
        return false;
    } catch (Exception e) {
        addError(e.getMessage());
        return false;
    }
}

From source file:com.panet.imeta.core.xml.XMLHandler.java

/**
 * Load a file into an XML document/*w  w  w .ja v a  2  s. c o m*/
 * 
 * @param filename
 *            The filename to load into a document
 * @param systemId
 *            Provide a base for resolving relative URIs.
 * @param ignoreEntities
 *            Ignores external entities and returns an empty dummy.
 * @param namespaceAware
 *            support XML namespaces.
 * @return the Document if all went well, null if an error occured!
 */
public static final Document loadXMLFile(FileObject fileObject, String systemID, boolean ignoreEntities,
        boolean namespaceAware) throws KettleXMLException {
    DocumentBuilderFactory dbf;
    DocumentBuilder db;
    Document doc;

    try {
        // Check and open XML document
        dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setNamespaceAware(namespaceAware);
        db = dbf.newDocumentBuilder();
        // even dbf.setValidating(false) will the parser NOT prevent from
        // checking the existance of the DTD
        // thus we need to give the BaseURI (systemID) below to have a
        // chance to get it
        // or return empty dummy documents for all external entities
        // (sources)
        if (ignoreEntities)
            db.setEntityResolver(new DTDIgnoringEntityResolver());
        InputStream inputStream = null;
        try {
            if (Const.isEmpty(systemID)) {
                // Normal parsing
                //
                inputStream = KettleVFS.getInputStream(fileObject);
                doc = db.parse(inputStream);
            } else {
                // Do extra verifications
                //
                String systemIDwithEndingSlash = systemID.trim();
                // make sure we have an ending slash, otherwise the last
                // part will be ignored
                if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) {
                    systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/");
                }
                inputStream = KettleVFS.getInputStream(fileObject);
                doc = db.parse(inputStream, systemIDwithEndingSlash);
            }
        } catch (FileNotFoundException ef) {
            throw new KettleXMLException(ef);
        } finally {
            if (inputStream != null)
                inputStream.close();
        }

        return doc;
    } catch (Exception e) {
        throw new KettleXMLException("Error reading information from file", e);
    }
}

From source file:com.autentia.mvn.plugin.changes.BugzillaChangesMojo.java

/**
 * Gets bugs XML document from Bugzilla.
 * /*from   w w  w  .j  a v  a  2  s  .c  o  m*/
 * @param client
 * @param bugsIds
 * @return
 * @throws MojoExecutionException
 */
private Document getBugsDocument(final HttpClient client, final String bugsIds) throws MojoExecutionException {
    final String link = this.bugzillaUrl + SHOWBUG_URL;
    try {
        final byte[] response = this.httpRequest.sendPostRequest(client, link, bugsIds);

        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        final DocumentBuilder db = dbf.newDocumentBuilder();
        db.setEntityResolver(new EntityResolver() {

            public InputSource resolveEntity(final String publicId, final String systemId)
                    throws SAXException, IOException {
                return new InputSource(
                        this.getClass().getClassLoader().getResourceAsStream("bugzilla3/bugzilla.dtd"));
            }
        });
        final ByteArrayInputStream bais = new ByteArrayInputStream(response);

        final Document docBugzilla = db.parse(bais);
        this.cleanBugzillaDocument(docBugzilla);
        return docBugzilla;
    } catch (final HttpStatusException e) {
        this.getLog().warn("Can not recover bugs in XML", e);
        throw new MojoExecutionException("Can not recover bugs in XML.", e);
    } catch (final IOException e) {
        this.getLog().warn("Can not recover bugs in XML", e);
        throw new MojoExecutionException("Can not recover bugs in XML.", e);
    } catch (final ParserConfigurationException e) {
        this.getLog().warn("Can not parse XML bugs", e);
        throw new MojoExecutionException("Can not parse XML bugs.", e);
    } catch (final SAXException e) {
        this.getLog().warn("Can not build bugs XML document", e);
        throw new MojoExecutionException("Can not build bugs XML document.", e);
    }
}

From source file:com.panet.imeta.core.xml.XMLHandler.java

/**
 * Load a file into an XML document/* ww  w .  j  a  v  a  2 s . c om*/
 * 
 * @param inputStream
 *            The stream to load a document from
 * @param systemId
 *            Provide a base for resolving relative URIs.
 * @param ignoreEntities
 *            Ignores external entities and returns an empty dummy.
 * @param namespaceAware
 *            support XML namespaces.
 * @return the Document if all went well, null if an error occured!
 */
public static final Document loadXMLFile(InputStream inputStream, String systemID, boolean ignoreEntities,
        boolean namespaceAware) throws KettleXMLException {
    DocumentBuilderFactory dbf;
    DocumentBuilder db;
    Document doc;

    try {
        // Check and open XML document
        //
        dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setNamespaceAware(namespaceAware);
        db = dbf.newDocumentBuilder();

        // even dbf.setValidating(false) will the parser NOT prevent from
        // checking the existance of the DTD
        // thus we need to give the BaseURI (systemID) below to have a
        // chance to get it
        // or return empty dummy documents for all external entities
        // (sources)
        //
        if (ignoreEntities) {
            db.setEntityResolver(new DTDIgnoringEntityResolver());
        }

        try {
            if (Const.isEmpty(systemID)) {
                // Normal parsing
                //
                doc = db.parse(inputStream);
            } else {
                // Do extra verifications
                //
                String systemIDwithEndingSlash = systemID.trim();

                // make sure we have an ending slash, otherwise the last
                // part will be ignored
                //
                if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) {
                    systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/");
                }
                doc = db.parse(inputStream, systemIDwithEndingSlash);
            }
        } catch (FileNotFoundException ef) {
            throw new KettleXMLException(ef);
        } finally {
            if (inputStream != null)
                inputStream.close();
        }

        return doc;
    } catch (Exception e) {
        throw new KettleXMLException("Error reading information from input stream", e);
    }
}

From source file:com.meidusa.amoeba.context.ProxyRuntimeContext.java

private ProxyServerConfig loadConfig(String configFileName) {
    DocumentBuilder db;

    try {/*from  w w w.  ja  v  a 2s. co m*/
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setValidating(true);
        dbf.setNamespaceAware(false);

        db = dbf.newDocumentBuilder();
        db.setEntityResolver(new EntityResolver() {

            public InputSource resolveEntity(String publicId, String systemId) {
                if (systemId.endsWith("amoeba.dtd")) {
                    InputStream in = ProxyRuntimeContext.class
                            .getResourceAsStream("/com/meidusa/amoeba/xml/amoeba.dtd");
                    if (in == null) {
                        LogLog.error("Could not find [amoeba.dtd]. Used ["
                                + ProxyRuntimeContext.class.getClassLoader() + "] class loader in the search.");
                        return null;
                    } else {
                        return new InputSource(in);
                    }
                } else {
                    return null;
                }
            }
        });

        db.setErrorHandler(new ErrorHandler() {

            public void warning(SAXParseException exception) {
            }

            public void error(SAXParseException exception) throws SAXException {
                logger.error(exception.getMessage() + " at (" + exception.getLineNumber() + ":"
                        + exception.getColumnNumber() + ")");
                throw exception;
            }

            public void fatalError(SAXParseException exception) throws SAXException {
                logger.fatal(exception.getMessage() + " at (" + exception.getLineNumber() + ":"
                        + exception.getColumnNumber() + ")");
                throw exception;
            }
        });
        return loadConfigurationFile(configFileName, db);
    } catch (Exception e) {
        logger.fatal("Could not load configuration file, failing", e);
        throw new ConfigurationException("Error loading configuration file " + configFileName, e);
    }
}

From source file:de.betterform.xml.xforms.XFormsProcessorImpl.java

private DocumentBuilder getDocumentBuilder() throws XFormsException {
    // ensure xerces dom
    try {//  w  w w .j a v  a2  s  . c  o  m
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        factory.setValidating(false);
        factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        // factory.setAttribute("http://apache.org/xml/properties/dom/document-class-name", "org.apache.xerces.dom.DocumentImpl");

        DocumentBuilder db = factory.newDocumentBuilder();
        // use an empty entity resolver to avoid that Xerces may try to
        // download the system DTD (can cause latency problems)
        db.setEntityResolver(new EntityResolver() {
            public InputSource resolveEntity(String publicId, String systemId)
                    throws SAXException, IOException {
                return null;
            };
        });
        return db;
    } catch (Exception e) {
        throw new XFormsException(e);
    }
}

From source file:ambit.data.qmrf.QMRFObject.java

public Document readDocument(InputSource source, boolean validating, EntityResolver resolver) throws Exception {

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    /*// w ww. j  a v a  2 s  . com
    factory.setNamespaceAware(true);      
    factory.setValidating(validating);
    */
    DocumentBuilder builder = factory.newDocumentBuilder();

    builder.setErrorHandler(new SimpleErrorHandler(getClass().getName()));

    // Install the entity resolver
    if (resolver != null)
        builder.setEntityResolver(resolver);

    return builder.parse(source);

}

From source file:ambit2.data.qmrf.QMRFObject.java

/**
  * Converts FO DOM Document into PDF (uses Apache FOP).
  * @param foDom  Document FO namespace xmlns:fo="http://www.w3.org/1999/XSL/Format"
  * @param pdf//w w  w . j av a2s  .c  o m
        
 protected void convertDOM2PDF(Document foDom, OutputStream pdf) {
try {
           
            
    // configure fopFactory as desired
    FopFactory fopFactory = FopFactory.newInstance();
            
        
    String foNS = "http://www.w3.org/1999/XSL/Format";           
    FOUserAgent foUserAgent = fopFactory.newFOUserAgent();
    // configure foUserAgent as desired
         
    // Setup output
            
    BufferedOutputStream  out = new java.io.BufferedOutputStream(pdf);
         
    try {
        // Construct fop with desired output format and output stream
        Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, out);
                
        // Setup Identity Transformer
        TransformerFactory xfactory = TransformerFactory.newInstance();
        Transformer transformer = xfactory.newTransformer(); // identity transformer
                
        // Setup input for XSLT transformation
        Source src = new DOMSource(foDom);
                
        // Resulting SAX events (the generated FO) must be piped through to FOP
        Result res = new SAXResult(fop.getDefaultHandler());
                
        // Start XSLT transformation and FOP processing
        transformer.transform(src, res);
    } finally {
        out.close();
    }
            
} catch (Exception e) {
    e.printStackTrace(System.err);
    System.exit(-1);
}
        
 }
 */
/*
 * Needs Xerces Parser - to verify if can work with other SAX parsers
public void readSchema(InputStream in) throws Exception {
    SAXParser parser = new SAXParser();
    DeclHandler handler = new CustomDeclHandler(this);
    parser.setProperty("http://xml.org/sax/properties/declaration-handler",
    handler);
    String filename = "ambit/data/qmrf/qmrf.dtd";
    //InputStream stream = this.getClass().getClassLoader().getResourceAsStream(filename);
    InputStream stream = new FileInputStream("qmrf_empty.xml");
    parser.parse(new InputSource(stream));
    stream.close();
        
}
*/
public Document readDocument(InputSource source, boolean validating, EntityResolver resolver) throws Exception {

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    /*
    factory.setNamespaceAware(true);      
    factory.setValidating(validating);
    */
    DocumentBuilder builder = factory.newDocumentBuilder();

    builder.setErrorHandler(new SimpleErrorHandler(getClass().getName()));

    // Install the entity resolver
    if (resolver != null)
        builder.setEntityResolver(resolver);

    return builder.parse(source);

}