List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringElementContentWhitespace
public void setIgnoringElementContentWhitespace(boolean whitespace)
From source file:AndroidUninstallStock.java
public static DocumentBuilderFactory getXmlDocFactory() throws SAXException { DocumentBuilderFactory xmlfactory = DocumentBuilderFactory.newInstance(); xmlfactory.setIgnoringComments(true); xmlfactory.setCoalescing(true);//from w w w . j a va2s . co m // http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4867706 xmlfactory.setIgnoringElementContentWhitespace(true); xmlfactory.setSchema(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI) .newSchema(AndroidUninstallStock.class.getResource("AndroidListSoft.xsd"))); xmlfactory.setValidating(false); // not DTD return xmlfactory; }
From source file:com.rapidminer.gui.OperatorDocLoader.java
/** * //ww w . j a va 2 s . c om * @param operatorWikiName * @param opDesc * @return The parsed <tt>Document</tt> (not finally parsed) of the selected operator. * @throws MalformedURLException * @throws ParserConfigurationException */ private static Document parseDocumentForOperator(String operatorWikiName, OperatorDescription opDesc) throws MalformedURLException, ParserConfigurationException { DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); builderFactory.setIgnoringComments(true); builderFactory.setIgnoringElementContentWhitespace(true); DocumentBuilder documentBuilder = builderFactory.newDocumentBuilder(); documentBuilder.setEntityResolver(new XHTMLEntityResolver()); Document document = null; URL url = new URL(WIKI_PREFIX_FOR_OPERATORS + operatorWikiName); if (url != null) { try { document = documentBuilder.parse(WebServiceTools.openStreamFromURL(url)); } catch (IOException e) { logger.warning("Could not open " + url.toExternalForm() + ": " + e.getMessage()); } catch (SAXException e) { logger.warning("Could not parse operator documentation: " + e.getMessage()); } int i = 0; if (document != null) { Element contentElement = document.getElementById("content"); // removing content element from document if (contentElement != null) { contentElement.getParentNode().removeChild(contentElement); } // removing everything from body NodeList bodies = document.getElementsByTagName("body"); for (int k = 0; k < bodies.getLength(); k++) { Node body = bodies.item(k); while (body.hasChildNodes()) { body.removeChild(body.getFirstChild()); } // read content element to body if (contentElement != null && k == 0) { body.appendChild(contentElement); } } // removing everything from head NodeList heads = document.getElementsByTagName("head"); for (int k = 0; k < heads.getLength(); k++) { Node head = heads.item(k); while (head.hasChildNodes()) { head.removeChild(head.getFirstChild()); } } // removing...<head/> from document if (heads != null) { while (i < heads.getLength()) { Node head = heads.item(i); head.getParentNode().removeChild(head); } } // removing jump-to-nav element from document Element jumpToNavElement = document.getElementById("jump-to-nav"); if (jumpToNavElement != null) { jumpToNavElement.getParentNode().removeChild(jumpToNavElement); } // removing mw-normal-catlinks element from document Element mwNormalCatlinksElement = document.getElementById("mw-normal-catlinks"); if (mwNormalCatlinksElement != null) { mwNormalCatlinksElement.getParentNode().removeChild(mwNormalCatlinksElement); } // removing complete link navigation Element tocElement = document.getElementById("toc"); if (tocElement != null) { tocElement.getParentNode().removeChild(tocElement); } // removing everything from class printfooter NodeList nodeListDiv = document.getElementsByTagName("div"); for (int k = 0; k < nodeListDiv.getLength(); k++) { Element div = (Element) nodeListDiv.item(k); if (div.getAttribute("class").equals("printfooter")) { div.getParentNode().removeChild(div); } } // removing everything from class editsection NodeList spanList = document.getElementsByTagName("span"); for (int k = 0; k < spanList.getLength(); k++) { Element span = (Element) spanList.item(k); if (span.getAttribute("class").equals("editsection")) { span.getParentNode().removeChild(span); } } // Synopsis Header boolean doIt = true; NodeList pList = document.getElementsByTagName("p"); for (int k = 0; k < pList.getLength(); k++) { if (doIt) { Node p = pList.item(k); NodeList pChildList = p.getChildNodes(); for (int j = 0; j < pChildList.getLength(); j++) { Node pChild = pChildList.item(j); if (pChild.getNodeType() == Node.TEXT_NODE && pChild.getNodeValue() != null && StringUtils.isNotBlank(pChild.getNodeValue()) && StringUtils.isNotEmpty(pChild.getNodeValue())) { String pChildString = pChild.getNodeValue(); Element newPWithoutSpaces = document.createElement("p"); newPWithoutSpaces.setTextContent(pChildString); Node synopsis = document.createTextNode("Synopsis"); Element span = document.createElement("span"); span.setAttribute("class", "mw-headline"); span.setAttribute("id", "Synopsis"); span.appendChild(synopsis); Element h2 = document.createElement("h2"); h2.appendChild(span); Element div = document.createElement("div"); div.setAttribute("id", "synopsis"); div.appendChild(h2); div.appendChild(newPWithoutSpaces); Node pChildParentParent = pChild.getParentNode().getParentNode(); Node pChildParent = pChild.getParentNode(); pChildParentParent.replaceChild(div, pChildParent); doIt = false; break; } } } else { break; } } // removing all <br...>-Tags NodeList brList = document.getElementsByTagName("br"); while (i < brList.getLength()) { Node br = brList.item(i); Node parentBrNode = br.getParentNode(); parentBrNode.removeChild(br); } // removing everything from script NodeList scriptList = document.getElementsByTagName("script"); while (i < scriptList.getLength()) { Node scriptNode = scriptList.item(i); Node parentNode = scriptNode.getParentNode(); parentNode.removeChild(scriptNode); } // removing all empty <p...>-Tags NodeList pList2 = document.getElementsByTagName("p"); int ccc = 0; while (ccc < pList2.getLength()) { Node p = pList2.item(ccc); NodeList pChilds = p.getChildNodes(); int kk = 0; while (kk < pChilds.getLength()) { Node pChild = pChilds.item(kk); if (pChild.getNodeType() == Node.TEXT_NODE) { String pNodeValue = pChild.getNodeValue(); if (pNodeValue == null || StringUtils.isBlank(pNodeValue) || StringUtils.isEmpty(pNodeValue)) { kk++; } else { ccc++; break; } } else { ccc++; break; } if (kk == pChilds.getLength()) { Node parentBrNode = p.getParentNode(); parentBrNode.removeChild(p); } } } // removing firstHeading element from document Element firstHeadingElement = document.getElementById("firstHeading"); if (firstHeadingElement != null) { CURRENT_OPERATOR_NAME_READ_FROM_RAPIDWIKI = firstHeadingElement.getFirstChild().getNodeValue() .replaceFirst(".*:", ""); firstHeadingElement.getParentNode().removeChild(firstHeadingElement); } // setting operator plugin name if (opDesc != null && opDesc.getProvider() != null) { CURRENT_OPERATOR_PLUGIN_NAME = opDesc.getProvider().getName(); } // removing sitesub element from document Element siteSubElement = document.getElementById("siteSub"); if (siteSubElement != null) { siteSubElement.getParentNode().removeChild(siteSubElement); } // removing contentSub element from document Element contentSubElement = document.getElementById("contentSub"); if (contentSubElement != null) { contentSubElement.getParentNode().removeChild(contentSubElement); } // removing catlinks element from document Element catlinksElement = document.getElementById("catlinks"); if (catlinksElement != null) { catlinksElement.getParentNode().removeChild(catlinksElement); } // removing <a...> element from document, if they are empty NodeList aList = document.getElementsByTagName("a"); if (aList != null) { int k = 0; while (k < aList.getLength()) { Node a = aList.item(k); Element aElement = (Element) a; if (aElement.getAttribute("class").equals("internal")) { a.getParentNode().removeChild(a); } else { Node aChild = a.getFirstChild(); if (aChild != null && (aChild.getNodeValue() != null && aChild.getNodeType() == Node.TEXT_NODE && StringUtils.isNotBlank(aChild.getNodeValue()) && StringUtils.isNotEmpty(aChild.getNodeValue()) || aChild.getNodeName() != null)) { Element aChildElement = null; if (aChild.getNodeName().startsWith("img")) { aChildElement = (Element) aChild; Element imgElement = document.createElement("img"); imgElement.setAttribute("alt", aChildElement.getAttribute("alt")); imgElement.setAttribute("class", aChildElement.getAttribute("class")); imgElement.setAttribute("height", aChildElement.getAttribute("height")); imgElement.setAttribute("src", WIKI_PREFIX_FOR_IMAGES + aChildElement.getAttribute("src")); imgElement.setAttribute("width", aChildElement.getAttribute("width")); imgElement.setAttribute("border", "1"); Node aParent = a.getParentNode(); aParent.replaceChild(imgElement, a); } else { k++; } } else { a.getParentNode().removeChild(a); } } } } } } return document; }
From source file:com.bstek.dorado.core.xml.XercesXmlDocumentBuilder.java
protected DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringElementContentWhitespace(true); factory.setIgnoringComments(true);/*from w w w . ja va 2 s .c o m*/ return factory.newDocumentBuilder(); }
From source file:com.wudaosoft.net.httpclient.XmlResponseHandler.java
@Override public XmlObject handleResponse(HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status < 200 || status >= 300) { throw new ClientProtocolException("Unexpected response status: " + status); }//from w w w.j av a 2s . c o m HttpEntity entity = response.getEntity(); if (entity == null) { throw new ClientProtocolException("Response contains no content"); } DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); dbfac.setIgnoringElementContentWhitespace(true); dbfac.setCoalescing(true); dbfac.setIgnoringComments(true); try { DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); ContentType contentType = ContentType.getOrDefault(entity); // if (!contentType.equals(ContentType.APPLICATION_XML)) { // throw new ClientProtocolException("Unexpected content type:" + // contentType); // } Charset charset = contentType.getCharset(); if (charset == null) { charset = Consts.UTF_8; } return XmlObject.fromDocument(docBuilder.parse(entity.getContent(), charset.name())); } catch (ParserConfigurationException ex) { throw new IllegalStateException(ex); } catch (SAXException ex) { throw new ClientProtocolException("Malformed XML document", ex); } }
From source file:br.com.insula.spring.security.janrain.JanrainService.java
private Document parseContent(InputStream content) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringElementContentWhitespace(true); DocumentBuilder db = dbf.newDocumentBuilder(); return db.parse(content); }
From source file:DOMImport.java
public void inandout(String infile1, String infile2, String outfile) { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(true);//from ww w . j a v a 2 s. c om dbf.setNamespaceAware(true); dbf.setIgnoringElementContentWhitespace(true); Document doc1 = null; Document doc2 = null; try { DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); InputSource is1 = new InputSource(infile1); doc1 = builder.parse(is1); InputSource is2 = new InputSource(infile2); doc2 = builder.parse(is2); importName(doc1, doc2); FileOutputStream fos = new FileOutputStream(outfile); TreeToXML ttxml = new TreeToXML(); ttxml.write(fos, doc2); fos.close(); } catch (SAXException e) { System.exit(1); } catch (ParserConfigurationException e) { System.err.println(e); System.exit(1); } catch (IOException e) { System.err.println(e); System.exit(1); } }
From source file:com.francetelecom.clara.cloud.db.liquibase.CompareChangeLogWithHibernateAutoCreateIT.java
private boolean searchForDifferenceInXml(File xmlFile) { boolean differenceFound = false; DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false);//from w w w . j ava2s. c o m factory.setIgnoringElementContentWhitespace(true); try { DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(xmlFile); Element databaseChangeLogRoot = document.getDocumentElement(); differenceFound = databaseChangeLogRoot.hasChildNodes(); // Do something with the document here. } catch (ParserConfigurationException | IOException | SAXException e) { LOGGER.info("Failed to parse xml file: {}", xmlFile, e); } return differenceFound; }
From source file:nl.surfnet.sab.SabResponseParser.java
private Document createDocument(InputStream documentStream) throws ParserConfigurationException, IOException, SAXException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true);//www . j a v a2s . c om factory.setIgnoringElementContentWhitespace(true); factory.setValidating(false); DocumentBuilder builder = factory.newDocumentBuilder(); return builder.parse(documentStream); }
From source file:edu.ucmerced.cas.services.CasShibServiceRegistrar.java
protected synchronized void initialize() throws CasShibServiceRegistrarException { try {//from w w w . j a v a 2 s . co m DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringElementContentWhitespace(true); factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); InputStream is = getClass().getClassLoader() .getResourceAsStream(casShibServiceRegistrationsResourceName); if (is == null) { throw new CasShibServiceRegistrarException( "Couldn't find " + casShibServiceRegistrationsResourceName + " in the classpath"); } try { Document regDoc = builder.parse(is); addEntries(regDoc.getDocumentElement()); } finally { is.close(); } } catch (ParserConfigurationException e) { throw new CasShibServiceRegistrarException(e); } catch (SAXException e) { throw new CasShibServiceRegistrarException(e); } catch (IOException e) { throw new CasShibServiceRegistrarException(e); } this.isInitialized = true; }
From source file:com.persistent.cloudninja.service.impl.RunningInstancesJSONDataServiceImpl.java
/** * Parse the response from deployment monitoring and get role name, instance name and instance status. * @param response The XML response of deployment monitoring task. * @return List of InstanceHealthRoleInstanceEntity * @throws ParserConfigurationException// ww w .j a v a2 s . com * @throws SAXException * @throws IOException * @throws XPathExpressionException */ private List<InstanceHealthRoleInstanceEntity> parseResponse(StringBuffer response) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException { DocumentBuilder documentBuilder = null; Document document = null; DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setIgnoringElementContentWhitespace(true); documentBuilder = documentBuilderFactory.newDocumentBuilder(); document = documentBuilder.parse(new InputSource(new StringReader(response.toString()))); XPathFactory xPathFactory = XPathFactory.newInstance(); XPath xPath = xPathFactory.newXPath(); NodeList roleNameList = (NodeList) xPath.evaluate("/Deployment/RoleInstanceList/RoleInstance/RoleName", document, XPathConstants.NODESET); NodeList instanceNameList = (NodeList) xPath.evaluate( "/Deployment/RoleInstanceList/RoleInstance/InstanceName", document, XPathConstants.NODESET); NodeList instanceStatusList = (NodeList) xPath.evaluate( "/Deployment/RoleInstanceList/RoleInstance/InstanceStatus", document, XPathConstants.NODESET); List<InstanceHealthRoleInstanceEntity> list = new ArrayList<InstanceHealthRoleInstanceEntity>(); for (int index = 0; index < roleNameList.getLength(); index++) { Element roleElement = (Element) roleNameList.item(index); Element instanceElement = (Element) instanceNameList.item(index); Element statusElement = (Element) instanceStatusList.item(index); InstanceHealthRoleInstanceEntity roleInstanceEntity = new InstanceHealthRoleInstanceEntity(); roleInstanceEntity.setRoleName(roleElement.getTextContent()); roleInstanceEntity.setInstanceName(instanceElement.getTextContent()); roleInstanceEntity.setInstanceStatus(statusElement.getTextContent()); list.add(roleInstanceEntity); } return list; }