Example usage for javax.xml.parsers DocumentBuilderFactory setIgnoringElementContentWhitespace

List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringElementContentWhitespace

Introduction

In this page you can find the example usage for javax.xml.parsers DocumentBuilderFactory setIgnoringElementContentWhitespace.

Prototype


public void setIgnoringElementContentWhitespace(boolean whitespace) 

Source Link

Document

Specifies that the parsers created by this factory must eliminate whitespace in element content (sometimes known loosely as 'ignorable whitespace') when parsing XML documents (see XML Rec 2.10).

Usage

From source file:AndroidUninstallStock.java

public static DocumentBuilderFactory getXmlDocFactory() throws SAXException {
    DocumentBuilderFactory xmlfactory = DocumentBuilderFactory.newInstance();
    xmlfactory.setIgnoringComments(true);
    xmlfactory.setCoalescing(true);//from   w  w w  . j a  va2s . co m
    // http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4867706
    xmlfactory.setIgnoringElementContentWhitespace(true);
    xmlfactory.setSchema(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
            .newSchema(AndroidUninstallStock.class.getResource("AndroidListSoft.xsd")));
    xmlfactory.setValidating(false); // not DTD
    return xmlfactory;
}

From source file:com.rapidminer.gui.OperatorDocLoader.java

/**
 * //ww w .  j a  va  2  s . c  om
 * @param operatorWikiName
 * @param opDesc
 * @return The parsed <tt>Document</tt> (not finally parsed) of the selected operator.
 * @throws MalformedURLException
 * @throws ParserConfigurationException
 */
private static Document parseDocumentForOperator(String operatorWikiName, OperatorDescription opDesc)
        throws MalformedURLException, ParserConfigurationException {
    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    builderFactory.setIgnoringComments(true);
    builderFactory.setIgnoringElementContentWhitespace(true);
    DocumentBuilder documentBuilder = builderFactory.newDocumentBuilder();
    documentBuilder.setEntityResolver(new XHTMLEntityResolver());

    Document document = null;
    URL url = new URL(WIKI_PREFIX_FOR_OPERATORS + operatorWikiName);
    if (url != null) {
        try {
            document = documentBuilder.parse(WebServiceTools.openStreamFromURL(url));
        } catch (IOException e) {
            logger.warning("Could not open " + url.toExternalForm() + ": " + e.getMessage());
        } catch (SAXException e) {
            logger.warning("Could not parse operator documentation: " + e.getMessage());
        }

        int i = 0;

        if (document != null) {
            Element contentElement = document.getElementById("content");

            // removing content element from document
            if (contentElement != null) {
                contentElement.getParentNode().removeChild(contentElement);
            }

            // removing everything from body
            NodeList bodies = document.getElementsByTagName("body");
            for (int k = 0; k < bodies.getLength(); k++) {
                Node body = bodies.item(k);
                while (body.hasChildNodes()) {
                    body.removeChild(body.getFirstChild());
                }

                // read content element to body
                if (contentElement != null && k == 0) {
                    body.appendChild(contentElement);
                }
            }

            // removing everything from head
            NodeList heads = document.getElementsByTagName("head");
            for (int k = 0; k < heads.getLength(); k++) {
                Node head = heads.item(k);
                while (head.hasChildNodes()) {
                    head.removeChild(head.getFirstChild());
                }
            }
            // removing...<head/> from document
            if (heads != null) {
                while (i < heads.getLength()) {
                    Node head = heads.item(i);
                    head.getParentNode().removeChild(head);
                }
            }

            // removing jump-to-nav element from document
            Element jumpToNavElement = document.getElementById("jump-to-nav");
            if (jumpToNavElement != null) {
                jumpToNavElement.getParentNode().removeChild(jumpToNavElement);
            }

            // removing mw-normal-catlinks element from document
            Element mwNormalCatlinksElement = document.getElementById("mw-normal-catlinks");
            if (mwNormalCatlinksElement != null) {
                mwNormalCatlinksElement.getParentNode().removeChild(mwNormalCatlinksElement);
            }

            // removing complete link navigation
            Element tocElement = document.getElementById("toc");
            if (tocElement != null) {
                tocElement.getParentNode().removeChild(tocElement);
            }

            // removing everything from class printfooter
            NodeList nodeListDiv = document.getElementsByTagName("div");
            for (int k = 0; k < nodeListDiv.getLength(); k++) {
                Element div = (Element) nodeListDiv.item(k);
                if (div.getAttribute("class").equals("printfooter")) {
                    div.getParentNode().removeChild(div);
                }
            }

            // removing everything from class editsection
            NodeList spanList = document.getElementsByTagName("span");
            for (int k = 0; k < spanList.getLength(); k++) {
                Element span = (Element) spanList.item(k);
                if (span.getAttribute("class").equals("editsection")) {
                    span.getParentNode().removeChild(span);
                }
            }

            // Synopsis Header
            boolean doIt = true;
            NodeList pList = document.getElementsByTagName("p");
            for (int k = 0; k < pList.getLength(); k++) {

                if (doIt) {
                    Node p = pList.item(k);
                    NodeList pChildList = p.getChildNodes();

                    for (int j = 0; j < pChildList.getLength(); j++) {

                        Node pChild = pChildList.item(j);
                        if (pChild.getNodeType() == Node.TEXT_NODE && pChild.getNodeValue() != null
                                && StringUtils.isNotBlank(pChild.getNodeValue())
                                && StringUtils.isNotEmpty(pChild.getNodeValue())) {

                            String pChildString = pChild.getNodeValue();
                            Element newPWithoutSpaces = document.createElement("p");
                            newPWithoutSpaces.setTextContent(pChildString);

                            Node synopsis = document.createTextNode("Synopsis");

                            Element span = document.createElement("span");
                            span.setAttribute("class", "mw-headline");
                            span.setAttribute("id", "Synopsis");
                            span.appendChild(synopsis);

                            Element h2 = document.createElement("h2");
                            h2.appendChild(span);

                            Element div = document.createElement("div");
                            div.setAttribute("id", "synopsis");
                            div.appendChild(h2);
                            div.appendChild(newPWithoutSpaces);

                            Node pChildParentParent = pChild.getParentNode().getParentNode();
                            Node pChildParent = pChild.getParentNode();

                            pChildParentParent.replaceChild(div, pChildParent);
                            doIt = false;
                            break;
                        }
                    }
                } else {
                    break;
                }
            }

            // removing all <br...>-Tags
            NodeList brList = document.getElementsByTagName("br");

            while (i < brList.getLength()) {
                Node br = brList.item(i);
                Node parentBrNode = br.getParentNode();
                parentBrNode.removeChild(br);
            }

            // removing everything from script
            NodeList scriptList = document.getElementsByTagName("script");
            while (i < scriptList.getLength()) {
                Node scriptNode = scriptList.item(i);
                Node parentNode = scriptNode.getParentNode();
                parentNode.removeChild(scriptNode);
            }

            // removing all empty <p...>-Tags
            NodeList pList2 = document.getElementsByTagName("p");
            int ccc = 0;
            while (ccc < pList2.getLength()) {
                Node p = pList2.item(ccc);
                NodeList pChilds = p.getChildNodes();

                int kk = 0;

                while (kk < pChilds.getLength()) {
                    Node pChild = pChilds.item(kk);
                    if (pChild.getNodeType() == Node.TEXT_NODE) {
                        String pNodeValue = pChild.getNodeValue();
                        if (pNodeValue == null || StringUtils.isBlank(pNodeValue)
                                || StringUtils.isEmpty(pNodeValue)) {
                            kk++;
                        } else {
                            ccc++;
                            break;
                        }
                    } else {
                        ccc++;
                        break;
                    }
                    if (kk == pChilds.getLength()) {
                        Node parentBrNode = p.getParentNode();
                        parentBrNode.removeChild(p);
                    }
                }
            }

            // removing firstHeading element from document
            Element firstHeadingElement = document.getElementById("firstHeading");
            if (firstHeadingElement != null) {
                CURRENT_OPERATOR_NAME_READ_FROM_RAPIDWIKI = firstHeadingElement.getFirstChild().getNodeValue()
                        .replaceFirst(".*:", "");
                firstHeadingElement.getParentNode().removeChild(firstHeadingElement);
            }

            // setting operator plugin name
            if (opDesc != null && opDesc.getProvider() != null) {
                CURRENT_OPERATOR_PLUGIN_NAME = opDesc.getProvider().getName();
            }

            // removing sitesub element from document
            Element siteSubElement = document.getElementById("siteSub");
            if (siteSubElement != null) {
                siteSubElement.getParentNode().removeChild(siteSubElement);
            }

            // removing contentSub element from document
            Element contentSubElement = document.getElementById("contentSub");
            if (contentSubElement != null) {
                contentSubElement.getParentNode().removeChild(contentSubElement);
            }

            // removing catlinks element from document
            Element catlinksElement = document.getElementById("catlinks");
            if (catlinksElement != null) {
                catlinksElement.getParentNode().removeChild(catlinksElement);
            }

            // removing <a...> element from document, if they are empty
            NodeList aList = document.getElementsByTagName("a");
            if (aList != null) {
                int k = 0;
                while (k < aList.getLength()) {
                    Node a = aList.item(k);
                    Element aElement = (Element) a;
                    if (aElement.getAttribute("class").equals("internal")) {
                        a.getParentNode().removeChild(a);
                    } else {
                        Node aChild = a.getFirstChild();
                        if (aChild != null
                                && (aChild.getNodeValue() != null && aChild.getNodeType() == Node.TEXT_NODE
                                        && StringUtils.isNotBlank(aChild.getNodeValue())
                                        && StringUtils.isNotEmpty(aChild.getNodeValue())
                                        || aChild.getNodeName() != null)) {
                            Element aChildElement = null;
                            if (aChild.getNodeName().startsWith("img")) {
                                aChildElement = (Element) aChild;

                                Element imgElement = document.createElement("img");
                                imgElement.setAttribute("alt", aChildElement.getAttribute("alt"));
                                imgElement.setAttribute("class", aChildElement.getAttribute("class"));
                                imgElement.setAttribute("height", aChildElement.getAttribute("height"));
                                imgElement.setAttribute("src",
                                        WIKI_PREFIX_FOR_IMAGES + aChildElement.getAttribute("src"));
                                imgElement.setAttribute("width", aChildElement.getAttribute("width"));
                                imgElement.setAttribute("border", "1");

                                Node aParent = a.getParentNode();
                                aParent.replaceChild(imgElement, a);
                            } else {
                                k++;
                            }
                        } else {
                            a.getParentNode().removeChild(a);
                        }
                    }
                }
            }

        }
    }
    return document;
}

From source file:com.bstek.dorado.core.xml.XercesXmlDocumentBuilder.java

protected DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setIgnoringElementContentWhitespace(true);
    factory.setIgnoringComments(true);/*from   w w  w .  ja  va 2  s  .c  o  m*/
    return factory.newDocumentBuilder();
}

From source file:com.wudaosoft.net.httpclient.XmlResponseHandler.java

@Override
public XmlObject handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
    int status = response.getStatusLine().getStatusCode();

    if (status < 200 || status >= 300) {
        throw new ClientProtocolException("Unexpected response status: " + status);
    }//from  w  w w.j  av  a 2s . c  o m

    HttpEntity entity = response.getEntity();

    if (entity == null) {
        throw new ClientProtocolException("Response contains no content");
    }

    DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
    dbfac.setIgnoringElementContentWhitespace(true);
    dbfac.setCoalescing(true);
    dbfac.setIgnoringComments(true);
    try {
        DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
        ContentType contentType = ContentType.getOrDefault(entity);
        //            if (!contentType.equals(ContentType.APPLICATION_XML)) {
        //                throw new ClientProtocolException("Unexpected content type:" +
        //                    contentType);
        //            }
        Charset charset = contentType.getCharset();
        if (charset == null) {
            charset = Consts.UTF_8;
        }
        return XmlObject.fromDocument(docBuilder.parse(entity.getContent(), charset.name()));
    } catch (ParserConfigurationException ex) {
        throw new IllegalStateException(ex);
    } catch (SAXException ex) {
        throw new ClientProtocolException("Malformed XML document", ex);
    }
}

From source file:br.com.insula.spring.security.janrain.JanrainService.java

private Document parseContent(InputStream content)
        throws ParserConfigurationException, SAXException, IOException {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setIgnoringElementContentWhitespace(true);
    DocumentBuilder db = dbf.newDocumentBuilder();
    return db.parse(content);
}

From source file:DOMImport.java

public void inandout(String infile1, String infile2, String outfile) {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(true);//from   ww  w  . j  a v  a 2 s. c  om
    dbf.setNamespaceAware(true);
    dbf.setIgnoringElementContentWhitespace(true);

    Document doc1 = null;
    Document doc2 = null;
    try {
        DocumentBuilder builder = dbf.newDocumentBuilder();
        builder.setErrorHandler(new MyErrorHandler());
        InputSource is1 = new InputSource(infile1);
        doc1 = builder.parse(is1);
        InputSource is2 = new InputSource(infile2);
        doc2 = builder.parse(is2);
        importName(doc1, doc2);
        FileOutputStream fos = new FileOutputStream(outfile);
        TreeToXML ttxml = new TreeToXML();
        ttxml.write(fos, doc2);
        fos.close();
    } catch (SAXException e) {
        System.exit(1);
    } catch (ParserConfigurationException e) {
        System.err.println(e);
        System.exit(1);
    } catch (IOException e) {
        System.err.println(e);
        System.exit(1);
    }
}

From source file:com.francetelecom.clara.cloud.db.liquibase.CompareChangeLogWithHibernateAutoCreateIT.java

private boolean searchForDifferenceInXml(File xmlFile) {
    boolean differenceFound = false;
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(false);//from w w w . j ava2s.  c o  m
    factory.setIgnoringElementContentWhitespace(true);
    try {
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document document = builder.parse(xmlFile);
        Element databaseChangeLogRoot = document.getDocumentElement();
        differenceFound = databaseChangeLogRoot.hasChildNodes();
        // Do something with the document here.
    } catch (ParserConfigurationException | IOException | SAXException e) {
        LOGGER.info("Failed to parse xml file: {}", xmlFile, e);
    }
    return differenceFound;
}

From source file:nl.surfnet.sab.SabResponseParser.java

private Document createDocument(InputStream documentStream)
        throws ParserConfigurationException, IOException, SAXException {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);//www .  j a  v  a2s .  c  om
    factory.setIgnoringElementContentWhitespace(true);
    factory.setValidating(false);

    DocumentBuilder builder = factory.newDocumentBuilder();
    return builder.parse(documentStream);
}

From source file:edu.ucmerced.cas.services.CasShibServiceRegistrar.java

protected synchronized void initialize() throws CasShibServiceRegistrarException {
    try {//from w w w  .  j a  v  a 2 s  .  co  m
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setIgnoringElementContentWhitespace(true);
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();

        InputStream is = getClass().getClassLoader()
                .getResourceAsStream(casShibServiceRegistrationsResourceName);
        if (is == null) {
            throw new CasShibServiceRegistrarException(
                    "Couldn't find " + casShibServiceRegistrationsResourceName + " in the classpath");
        }
        try {
            Document regDoc = builder.parse(is);
            addEntries(regDoc.getDocumentElement());
        } finally {
            is.close();
        }
    } catch (ParserConfigurationException e) {
        throw new CasShibServiceRegistrarException(e);
    } catch (SAXException e) {
        throw new CasShibServiceRegistrarException(e);
    } catch (IOException e) {
        throw new CasShibServiceRegistrarException(e);
    }

    this.isInitialized = true;
}

From source file:com.persistent.cloudninja.service.impl.RunningInstancesJSONDataServiceImpl.java

/**
 * Parse the response from deployment monitoring and get role name, instance name and instance status.
 * @param response The XML response of deployment monitoring task.
 * @return List of InstanceHealthRoleInstanceEntity
 * @throws ParserConfigurationException//  ww w .j  a v  a2 s .  com
 * @throws SAXException
 * @throws IOException
 * @throws XPathExpressionException
 */
private List<InstanceHealthRoleInstanceEntity> parseResponse(StringBuffer response)
        throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
    DocumentBuilder documentBuilder = null;
    Document document = null;
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setIgnoringElementContentWhitespace(true);
    documentBuilder = documentBuilderFactory.newDocumentBuilder();
    document = documentBuilder.parse(new InputSource(new StringReader(response.toString())));

    XPathFactory xPathFactory = XPathFactory.newInstance();
    XPath xPath = xPathFactory.newXPath();
    NodeList roleNameList = (NodeList) xPath.evaluate("/Deployment/RoleInstanceList/RoleInstance/RoleName",
            document, XPathConstants.NODESET);
    NodeList instanceNameList = (NodeList) xPath.evaluate(
            "/Deployment/RoleInstanceList/RoleInstance/InstanceName", document, XPathConstants.NODESET);
    NodeList instanceStatusList = (NodeList) xPath.evaluate(
            "/Deployment/RoleInstanceList/RoleInstance/InstanceStatus", document, XPathConstants.NODESET);

    List<InstanceHealthRoleInstanceEntity> list = new ArrayList<InstanceHealthRoleInstanceEntity>();
    for (int index = 0; index < roleNameList.getLength(); index++) {
        Element roleElement = (Element) roleNameList.item(index);
        Element instanceElement = (Element) instanceNameList.item(index);
        Element statusElement = (Element) instanceStatusList.item(index);

        InstanceHealthRoleInstanceEntity roleInstanceEntity = new InstanceHealthRoleInstanceEntity();
        roleInstanceEntity.setRoleName(roleElement.getTextContent());
        roleInstanceEntity.setInstanceName(instanceElement.getTextContent());
        roleInstanceEntity.setInstanceStatus(statusElement.getTextContent());
        list.add(roleInstanceEntity);
    }
    return list;
}