Example usage for javax.xml.parsers DocumentBuilderFactory setCoalescing

List of usage examples for javax.xml.parsers DocumentBuilderFactory setCoalescing

Introduction

In this page you can find the example usage for javax.xml.parsers DocumentBuilderFactory setCoalescing.

Prototype


public void setCoalescing(boolean coalescing) 

Source Link

Document

Specifies that the parser produced by this code will convert CDATA nodes to Text nodes and append it to the adjacent (if any) text node.

Usage

From source file:net.sourceforge.pmd.lang.xml.ast.XmlParser.java

protected Document parseDocument(Reader reader) throws ParseException {
    nodeCache.clear();//w w  w.j a va 2 s  .c  om
    try {
        String xmlData = IOUtils.toString(reader);

        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(parserOptions.isNamespaceAware());
        dbf.setValidating(parserOptions.isValidating());
        dbf.setIgnoringComments(parserOptions.isIgnoringComments());
        dbf.setIgnoringElementContentWhitespace(parserOptions.isIgnoringElementContentWhitespace());
        dbf.setExpandEntityReferences(parserOptions.isExpandEntityReferences());
        dbf.setCoalescing(parserOptions.isCoalescing());
        dbf.setXIncludeAware(parserOptions.isXincludeAware());
        dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
        dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        DocumentBuilder documentBuilder = dbf.newDocumentBuilder();
        documentBuilder.setEntityResolver(parserOptions.getEntityResolver());
        Document document = documentBuilder.parse(new InputSource(new StringReader(xmlData)));
        DOMLineNumbers lineNumbers = new DOMLineNumbers(document, xmlData);
        lineNumbers.determine();
        return document;
    } catch (ParserConfigurationException | SAXException | IOException e) {
        throw new ParseException(e);
    }
}

From source file:nl.nn.adapterframework.util.XmlUtils.java

public static String cdataToText(String input) {
    try {/*from  ww  w. java  2  s.  co m*/
        DocumentBuilderFactory factory = getDocumentBuilderFactory();
        factory.setCoalescing(true);
        StringReader sr = new StringReader(input);
        InputSource src = new InputSource(sr);
        Document doc = factory.newDocumentBuilder().parse(src);
        return nodeToString(doc);
    } catch (Exception e) {
        return null;
    }
}

From source file:org.apache.rahas.impl.util.SAMLUtilsTest.java

private static boolean equals(String element1, String element2)
        throws ParserConfigurationException, IOException, SAXException {

    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);//from ww w  .  j a  v a2 s .c  o  m
    dbf.setCoalescing(true);
    dbf.setIgnoringElementContentWhitespace(true);
    dbf.setIgnoringComments(true);
    DocumentBuilder db = dbf.newDocumentBuilder();

    Document doc1 = db.parse(new ByteArrayInputStream(element1.getBytes("UTF-8")));
    doc1.normalizeDocument();

    Document doc2 = db.parse(new ByteArrayInputStream(element1.getBytes("UTF-8")));
    doc2.normalizeDocument();

    return doc1.isEqualNode(doc2);
}

From source file:org.codelibs.robot.transformer.impl.XmlTransformer.java

@Override
public ResultData transform(final ResponseData responseData) {
    if (responseData == null || responseData.getResponseBody() == null) {
        throw new RobotCrawlAccessException("No response body.");
    }/*from  w  w  w.  ja va2 s  . c om*/

    final File tempFile = ResponseDataUtil.createResponseBodyFile(responseData);

    FileInputStream fis = null;

    try {
        fis = new FileInputStream(tempFile);
        final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

        for (final Map.Entry<String, Object> entry : attributeMap.entrySet()) {
            factory.setAttribute(entry.getKey(), entry.getValue());
        }

        for (final Map.Entry<String, String> entry : featureMap.entrySet()) {
            factory.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
        }

        factory.setCoalescing(coalescing);
        factory.setExpandEntityReferences(expandEntityRef);
        factory.setIgnoringComments(ignoringComments);
        factory.setIgnoringElementContentWhitespace(ignoringElementContentWhitespace);
        factory.setNamespaceAware(namespaceAware);
        factory.setValidating(validating);
        factory.setXIncludeAware(includeAware);

        final DocumentBuilder builder = factory.newDocumentBuilder();

        final Document doc = builder.parse(fis);

        final StringBuilder buf = new StringBuilder(1000);
        buf.append(getResultDataHeader());
        for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
            final List<String> nodeStrList = new ArrayList<String>();
            try {
                final NodeList nodeList = getNodeList(doc, entry.getValue());
                for (int i = 0; i < nodeList.getLength(); i++) {
                    final Node node = nodeList.item(i);
                    nodeStrList.add(node.getTextContent());
                }
            } catch (final TransformerException e) {
                logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue(), e);
            }
            if (nodeStrList.size() == 1) {
                buf.append(getResultDataBody(entry.getKey(), nodeStrList.get(0)));
            } else if (nodeStrList.size() > 1) {
                buf.append(getResultDataBody(entry.getKey(), nodeStrList));
            }
        }
        buf.append(getAdditionalData(responseData, doc));
        buf.append(getResultDataFooter());

        final ResultData resultData = new ResultData();
        resultData.setTransformerName(getName());

        try {
            resultData.setData(buf.toString().getBytes(charsetName));
        } catch (final UnsupportedEncodingException e) {
            if (logger.isInfoEnabled()) {
                logger.info("Invalid charsetName: " + charsetName + ". Changed to " + Constants.UTF_8, e);
            }
            charsetName = Constants.UTF_8_CHARSET.name();
            resultData.setData(buf.toString().getBytes(Constants.UTF_8_CHARSET));
        }
        resultData.setEncoding(charsetName);

        return resultData;
    } catch (final RobotSystemException e) {
        throw e;
    } catch (final Exception e) {
        throw new RobotSystemException("Could not store data.", e);
    } finally {
        IOUtils.closeQuietly(fis);
        // clean up
        if (!tempFile.delete()) {
            logger.warn("Could not delete a temp file: " + tempFile);
        }
    }
}

From source file:org.cytobank.acs.core.TableOfContents.java

/**
 * <p>Creates a DocumentBuilder with Cytobank's preferred security settings
 * applied to it. Specifically turning off external entities and external
 * DTDs to prevent External Entity Exploits (XXE)</p>
 *
 * @throws ParserConfigurationException//from   w  w w  . j a v a 2 s  .  com
 * @return DocumentBuilder
 */
protected DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {

    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

    DocumentBuilder db = null;

    String FEATURE = null;

    // This is the PRIMARY defense. If DTDs (doctypes) are disallowed, almost all XML entity attacks are prevented
    // Xerces 2 only - http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl
    FEATURE = "http://apache.org/xml/features/disallow-doctype-decl";
    dbf.setFeature(FEATURE, true);

    // If you can't completely disable DTDs, then at least do the following:
    // Xerces 1 - http://xerces.apache.org/xerces-j/features.html#external-general-entities
    // Xerces 2 - http://xerces.apache.org/xerces2-j/features.html#external-general-entities
    // JDK7+ - http://xml.org/sax/features/external-general-entities
    FEATURE = "http://xml.org/sax/features/external-general-entities";
    dbf.setFeature(FEATURE, false);

    // Xerces 1 - http://xerces.apache.org/xerces-j/features.html#external-parameter-entities
    // Xerces 2 - http://xerces.apache.org/xerces2-j/features.html#external-parameter-entities
    // JDK7+ - http://xml.org/sax/features/external-parameter-entities
    FEATURE = "http://xml.org/sax/features/external-parameter-entities";
    dbf.setFeature(FEATURE, false);

    // Disable external DTDs as well
    FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
    dbf.setFeature(FEATURE, false);

    // and these as well, per Timothy Morgan's 2014 paper: "XML Schema, DTD, and Entity Attacks" (see reference below)
    dbf.setXIncludeAware(false);
    dbf.setExpandEntityReferences(false);

    // And, per Timothy Morgan: "If for some reason support for inline DOCTYPEs are a requirement, then
    // ensure the entity settings are disabled (as shown above) and beware that SSRF attacks
    // (http://cwe.mitre.org/data/definitions/918.html) and denial
    // of service attacks (such as billion laughs or decompression bombs via "jar:") are a risk."

    boolean namespaceAware = true;
    boolean xsdValidate = false;
    boolean ignoreWhitespace = false;
    boolean ignoreComments = false;
    boolean putCDATAIntoText = false;
    boolean createEntityRefs = false;

    dbf.setNamespaceAware(namespaceAware);
    dbf.setValidating(xsdValidate);
    dbf.setIgnoringComments(ignoreComments);
    dbf.setIgnoringElementContentWhitespace(ignoreWhitespace);
    dbf.setCoalescing(putCDATAIntoText);
    dbf.setExpandEntityReferences(createEntityRefs);

    db = dbf.newDocumentBuilder();

    return db;

}

From source file:org.dspace.content.packager.RoleIngester.java

/**
 * Ingest roles from an InputStream./*from  w  ww  . j  a va  2s . c  o  m*/
 *
 * @param context
 *          DSpace Context
 * @param parent
 *          the Parent DSpaceObject
 * @param stream
 *          the XML Document InputStream
 * @throws PackageException
 * @throws SQLException
 * @throws AuthorizeException
 */
public static void ingestStream(Context context, DSpaceObject parent, PackageParameters params,
        InputStream stream) throws PackageException, SQLException, AuthorizeException {
    Document document;

    try {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setCoalescing(true);
        DocumentBuilder db = dbf.newDocumentBuilder();
        document = db.parse(stream);
    } catch (ParserConfigurationException e) {
        throw new PackageException(e);
    } catch (SAXException e) {
        throw new PackageException(e);
    } catch (IOException e) {
        throw new PackageException(e);
    }
    /*
     * TODO ? finally { close(stream); }
     */
    ingestDocument(context, parent, params, document);
}

From source file:org.dspace.content.packager.RoleIngester.java

@Override
public DSpaceObject ingest(Context context, DSpaceObject parent, File pkgFile, PackageParameters params,
        String license)/* ww  w .j av  a  2s.com*/
        throws PackageException, CrosswalkException, AuthorizeException, SQLException, IOException {
    Document document;

    try {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setCoalescing(true);
        DocumentBuilder db = dbf.newDocumentBuilder();
        document = db.parse(pkgFile);
    } catch (ParserConfigurationException e) {
        throw new PackageException(e);
    } catch (SAXException e) {
        throw new PackageException(e);
    }
    ingestDocument(context, parent, params, document);

    /* Does not create a DSpaceObject */
    return null;
}

From source file:org.gluu.oxtrust.ldap.service.Shibboleth2ConfService.java

/**
 * @param stream/*from  www.ja v a 2  s  .c  om*/
 * @throws IOException
 * @throws SAXException
 * @throws ParserConfigurationException
 */
public synchronized GluuErrorHandler validateMetadata(InputStream stream)
        throws ParserConfigurationException, SAXException, IOException {
    DocumentBuilderFactory newFactory = DocumentBuilderFactory.newInstance();
    newFactory.setCoalescing(false);
    newFactory.setExpandEntityReferences(true);
    newFactory.setIgnoringComments(false);

    newFactory.setIgnoringElementContentWhitespace(false);
    newFactory.setNamespaceAware(true);
    newFactory.setValidating(false);
    DocumentBuilder xmlParser = newFactory.newDocumentBuilder();
    Document xmlDoc = xmlParser.parse(stream);
    String schemaDir = System.getProperty("catalina.home") + File.separator + "conf" + File.separator
            + "shibboleth2" + File.separator + "idp" + File.separator + "schema" + File.separator;
    Schema schema = SchemaBuilder.buildSchema(SchemaLanguage.XML, schemaDir);
    Validator validator = schema.newValidator();
    GluuErrorHandler handler = new GluuErrorHandler();
    validator.setErrorHandler(handler);
    validator.validate(new DOMSource(xmlDoc));

    return handler;

}

From source file:org.kuali.rice.edl.impl.components.NoteConfigComponent.java

protected DocumentBuilder getDocumentBuilder(boolean coalesce) throws Exception {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setCoalescing(coalesce);
    return dbf.newDocumentBuilder();
}