List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringComments
public void setIgnoringComments(boolean ignoreComments)
From source file:AndroidUninstallStock.java
public static DocumentBuilderFactory getXmlDocFactory() throws SAXException { DocumentBuilderFactory xmlfactory = DocumentBuilderFactory.newInstance(); xmlfactory.setIgnoringComments(true); xmlfactory.setCoalescing(true);/* w w w . ja va 2 s . c o m*/ // http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4867706 xmlfactory.setIgnoringElementContentWhitespace(true); xmlfactory.setSchema(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI) .newSchema(AndroidUninstallStock.class.getResource("AndroidListSoft.xsd"))); xmlfactory.setValidating(false); // not DTD return xmlfactory; }
From source file:com.rapidminer.gui.OperatorDocLoader.java
/** * /*from w ww . j a va2 s . co m*/ * @param operatorWikiName * @param opDesc * @return The parsed <tt>Document</tt> (not finally parsed) of the selected operator. * @throws MalformedURLException * @throws ParserConfigurationException */ private static Document parseDocumentForOperator(String operatorWikiName, OperatorDescription opDesc) throws MalformedURLException, ParserConfigurationException { DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); builderFactory.setIgnoringComments(true); builderFactory.setIgnoringElementContentWhitespace(true); DocumentBuilder documentBuilder = builderFactory.newDocumentBuilder(); documentBuilder.setEntityResolver(new XHTMLEntityResolver()); Document document = null; URL url = new URL(WIKI_PREFIX_FOR_OPERATORS + operatorWikiName); if (url != null) { try { document = documentBuilder.parse(WebServiceTools.openStreamFromURL(url)); } catch (IOException e) { logger.warning("Could not open " + url.toExternalForm() + ": " + e.getMessage()); } catch (SAXException e) { logger.warning("Could not parse operator documentation: " + e.getMessage()); } int i = 0; if (document != null) { Element contentElement = document.getElementById("content"); // removing content element from document if (contentElement != null) { contentElement.getParentNode().removeChild(contentElement); } // removing everything from body NodeList bodies = document.getElementsByTagName("body"); for (int k = 0; k < bodies.getLength(); k++) { Node body = bodies.item(k); while (body.hasChildNodes()) { body.removeChild(body.getFirstChild()); } // read content element to body if (contentElement != null && k == 0) { body.appendChild(contentElement); } } // removing everything from head NodeList heads = document.getElementsByTagName("head"); for (int k = 0; k < heads.getLength(); k++) { Node head = heads.item(k); while (head.hasChildNodes()) { head.removeChild(head.getFirstChild()); } } // removing...<head/> from document if (heads != null) { while (i < heads.getLength()) { Node head = heads.item(i); head.getParentNode().removeChild(head); } } // removing jump-to-nav element from document Element jumpToNavElement = document.getElementById("jump-to-nav"); if (jumpToNavElement != null) { jumpToNavElement.getParentNode().removeChild(jumpToNavElement); } // removing mw-normal-catlinks element from document Element mwNormalCatlinksElement = document.getElementById("mw-normal-catlinks"); if (mwNormalCatlinksElement != null) { mwNormalCatlinksElement.getParentNode().removeChild(mwNormalCatlinksElement); } // removing complete link navigation Element tocElement = document.getElementById("toc"); if (tocElement != null) { tocElement.getParentNode().removeChild(tocElement); } // removing everything from class printfooter NodeList nodeListDiv = document.getElementsByTagName("div"); for (int k = 0; k < nodeListDiv.getLength(); k++) { Element div = (Element) nodeListDiv.item(k); if (div.getAttribute("class").equals("printfooter")) { div.getParentNode().removeChild(div); } } // removing everything from class editsection NodeList spanList = document.getElementsByTagName("span"); for (int k = 0; k < spanList.getLength(); k++) { Element span = (Element) spanList.item(k); if (span.getAttribute("class").equals("editsection")) { span.getParentNode().removeChild(span); } } // Synopsis Header boolean doIt = true; NodeList pList = document.getElementsByTagName("p"); for (int k = 0; k < pList.getLength(); k++) { if (doIt) { Node p = pList.item(k); NodeList pChildList = p.getChildNodes(); for (int j = 0; j < pChildList.getLength(); j++) { Node pChild = pChildList.item(j); if (pChild.getNodeType() == Node.TEXT_NODE && pChild.getNodeValue() != null && StringUtils.isNotBlank(pChild.getNodeValue()) && StringUtils.isNotEmpty(pChild.getNodeValue())) { String pChildString = pChild.getNodeValue(); Element newPWithoutSpaces = document.createElement("p"); newPWithoutSpaces.setTextContent(pChildString); Node synopsis = document.createTextNode("Synopsis"); Element span = document.createElement("span"); span.setAttribute("class", "mw-headline"); span.setAttribute("id", "Synopsis"); span.appendChild(synopsis); Element h2 = document.createElement("h2"); h2.appendChild(span); Element div = document.createElement("div"); div.setAttribute("id", "synopsis"); div.appendChild(h2); div.appendChild(newPWithoutSpaces); Node pChildParentParent = pChild.getParentNode().getParentNode(); Node pChildParent = pChild.getParentNode(); pChildParentParent.replaceChild(div, pChildParent); doIt = false; break; } } } else { break; } } // removing all <br...>-Tags NodeList brList = document.getElementsByTagName("br"); while (i < brList.getLength()) { Node br = brList.item(i); Node parentBrNode = br.getParentNode(); parentBrNode.removeChild(br); } // removing everything from script NodeList scriptList = document.getElementsByTagName("script"); while (i < scriptList.getLength()) { Node scriptNode = scriptList.item(i); Node parentNode = scriptNode.getParentNode(); parentNode.removeChild(scriptNode); } // removing all empty <p...>-Tags NodeList pList2 = document.getElementsByTagName("p"); int ccc = 0; while (ccc < pList2.getLength()) { Node p = pList2.item(ccc); NodeList pChilds = p.getChildNodes(); int kk = 0; while (kk < pChilds.getLength()) { Node pChild = pChilds.item(kk); if (pChild.getNodeType() == Node.TEXT_NODE) { String pNodeValue = pChild.getNodeValue(); if (pNodeValue == null || StringUtils.isBlank(pNodeValue) || StringUtils.isEmpty(pNodeValue)) { kk++; } else { ccc++; break; } } else { ccc++; break; } if (kk == pChilds.getLength()) { Node parentBrNode = p.getParentNode(); parentBrNode.removeChild(p); } } } // removing firstHeading element from document Element firstHeadingElement = document.getElementById("firstHeading"); if (firstHeadingElement != null) { CURRENT_OPERATOR_NAME_READ_FROM_RAPIDWIKI = firstHeadingElement.getFirstChild().getNodeValue() .replaceFirst(".*:", ""); firstHeadingElement.getParentNode().removeChild(firstHeadingElement); } // setting operator plugin name if (opDesc != null && opDesc.getProvider() != null) { CURRENT_OPERATOR_PLUGIN_NAME = opDesc.getProvider().getName(); } // removing sitesub element from document Element siteSubElement = document.getElementById("siteSub"); if (siteSubElement != null) { siteSubElement.getParentNode().removeChild(siteSubElement); } // removing contentSub element from document Element contentSubElement = document.getElementById("contentSub"); if (contentSubElement != null) { contentSubElement.getParentNode().removeChild(contentSubElement); } // removing catlinks element from document Element catlinksElement = document.getElementById("catlinks"); if (catlinksElement != null) { catlinksElement.getParentNode().removeChild(catlinksElement); } // removing <a...> element from document, if they are empty NodeList aList = document.getElementsByTagName("a"); if (aList != null) { int k = 0; while (k < aList.getLength()) { Node a = aList.item(k); Element aElement = (Element) a; if (aElement.getAttribute("class").equals("internal")) { a.getParentNode().removeChild(a); } else { Node aChild = a.getFirstChild(); if (aChild != null && (aChild.getNodeValue() != null && aChild.getNodeType() == Node.TEXT_NODE && StringUtils.isNotBlank(aChild.getNodeValue()) && StringUtils.isNotEmpty(aChild.getNodeValue()) || aChild.getNodeName() != null)) { Element aChildElement = null; if (aChild.getNodeName().startsWith("img")) { aChildElement = (Element) aChild; Element imgElement = document.createElement("img"); imgElement.setAttribute("alt", aChildElement.getAttribute("alt")); imgElement.setAttribute("class", aChildElement.getAttribute("class")); imgElement.setAttribute("height", aChildElement.getAttribute("height")); imgElement.setAttribute("src", WIKI_PREFIX_FOR_IMAGES + aChildElement.getAttribute("src")); imgElement.setAttribute("width", aChildElement.getAttribute("width")); imgElement.setAttribute("border", "1"); Node aParent = a.getParentNode(); aParent.replaceChild(imgElement, a); } else { k++; } } else { a.getParentNode().removeChild(a); } } } } } } return document; }
From source file:com.bstek.dorado.core.xml.XercesXmlDocumentBuilder.java
protected DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringElementContentWhitespace(true); factory.setIgnoringComments(true); return factory.newDocumentBuilder(); }
From source file:net.mumie.coursecreator.xml.ELClassListWrapper.java
private ELClassListWrapper() throws ParserConfigurationException { DocumentBuilderFactory fac = DocumentBuilderFactory.newInstance(); fac.setIgnoringComments(true); fac.setNamespaceAware(true);/*from ww w . j av a 2 s . co m*/ fac.setValidating(false); this.domBuilder = fac.newDocumentBuilder(); }
From source file:com.amalto.core.save.DOMDocumentTest.java
public void testIncludeXSINamespace() throws Exception { String lineSeparator = System.getProperty("line.separator"); StringBuilder xmlBuilder = new StringBuilder( "<Organisation xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"); xmlBuilder.append(lineSeparator);//from w w w .ja va 2s. c o m xmlBuilder.append("<IdOrganisation xsi:type=\"xsd:string\">5797</IdOrganisation>"); xmlBuilder.append(lineSeparator); xmlBuilder.append("</Organisation>"); xmlBuilder.append(lineSeparator); String xml = xmlBuilder.toString(); InputStream documentStream = new ByteArrayInputStream(xml.getBytes("UTF-8")); // Parsing MutableDocument userDocument; DocumentBuilderFactory DOM_PARSER_FACTORY = DocumentBuilderFactory.newInstance(); DOM_PARSER_FACTORY.setNamespaceAware(true); DOM_PARSER_FACTORY.setIgnoringComments(true); DOM_PARSER_FACTORY.setValidating(false); try { // Don't ignore talend internal attributes when parsing this document DocumentBuilder documentBuilder = new SkipAttributeDocumentBuilder( DOM_PARSER_FACTORY.newDocumentBuilder(), false); InputSource source = new InputSource(documentStream); Document userDomDocument = documentBuilder.parse(source); userDocument = new DOMDocument(userDomDocument, null, StringUtils.EMPTY, StringUtils.EMPTY); } catch (Exception e) { throw new RuntimeException("Unable to parse document to save.", e); } assertNotNull(userDocument); String result = userDocument.exportToString(); assertEquals(xml, result); }
From source file:com.icloud.framework.http.domain.DomainSuffixesReader.java
void read(DomainSuffixes tldEntries, InputStream input) throws IOException { try {//from w w w . j a va 2s. c o m DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(new InputSource(input)); Element root = document.getDocumentElement(); if (root != null && root.getTagName().equals("domains")) { Element tlds = (Element) root.getElementsByTagName("tlds").item(0); Element suffixes = (Element) root.getElementsByTagName("suffixes").item(0); //read tlds readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds").item(0)); readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds").item(0)); readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds").item(0)); readSuffixes(tldEntries, suffixes); } else { throw new IOException("xml file is not valid"); } } catch (ParserConfigurationException ex) { LOG.warn(TZUtil.stringifyException(ex)); throw new IOException(ex.getMessage()); } catch (SAXException ex) { LOG.warn(TZUtil.stringifyException(ex)); throw new IOException(ex.getMessage()); } }
From source file:com.iflytek.spider.util.domain.DomainSuffixesReader.java
void read(DomainSuffixes tldEntries, InputStream input) throws IOException { try {//from w w w .j av a2 s .co m DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(new InputSource(input)); Element root = document.getDocumentElement(); if (root != null && root.getTagName().equals("domains")) { Element tlds = (Element) root.getElementsByTagName("tlds").item(0); Element suffixes = (Element) root.getElementsByTagName("suffixes").item(0); //read tlds readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds").item(0)); readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds").item(0)); readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds").item(0)); readSuffixes(tldEntries, suffixes); } else { throw new IOException("xml file is not valid"); } } catch (ParserConfigurationException ex) { LOG.warn(StringUtils.stringifyException(ex)); throw new IOException(ex.getMessage()); } catch (SAXException ex) { LOG.warn(StringUtils.stringifyException(ex)); throw new IOException(ex.getMessage()); } }
From source file:com.stratio.decision.service.SolrOperationsService.java
public void createSolrSchema(List<ColumnNameTypeValue> columns, String confpath) throws ParserConfigurationException, URISyntaxException, IOException, SAXException, TransformerException {// w w w . j a v a2 s . co m DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setIgnoringComments(true); DocumentBuilder builder = domFactory.newDocumentBuilder(); Document doc = builder.parse(new File(ClassLoader.getSystemResource("./solr-config/schema.xml").toURI())); NodeList nodes = doc.getElementsByTagName("schema"); for (ColumnNameTypeValue column : columns) { Element field = doc.createElement("field"); field.setAttribute("name", column.getColumn()); field.setAttribute("type", streamingToSolr(column.getType())); field.setAttribute("indexed", "true"); field.setAttribute("stored", "true"); nodes.item(0).appendChild(field); } TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult streamResult = new StreamResult(new File(confpath + "/schema.xml")); transformer.transform(source, streamResult); }
From source file:com.wudaosoft.net.httpclient.XmlResponseHandler.java
@Override public XmlObject handleResponse(HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status < 200 || status >= 300) { throw new ClientProtocolException("Unexpected response status: " + status); }/*from ww w.j a va2s . c o m*/ HttpEntity entity = response.getEntity(); if (entity == null) { throw new ClientProtocolException("Response contains no content"); } DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); dbfac.setIgnoringElementContentWhitespace(true); dbfac.setCoalescing(true); dbfac.setIgnoringComments(true); try { DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); ContentType contentType = ContentType.getOrDefault(entity); // if (!contentType.equals(ContentType.APPLICATION_XML)) { // throw new ClientProtocolException("Unexpected content type:" + // contentType); // } Charset charset = contentType.getCharset(); if (charset == null) { charset = Consts.UTF_8; } return XmlObject.fromDocument(docBuilder.parse(entity.getContent(), charset.name())); } catch (ParserConfigurationException ex) { throw new IllegalStateException(ex); } catch (SAXException ex) { throw new ClientProtocolException("Malformed XML document", ex); } }
From source file:com.l2jfree.gameserver.datatables.SummonItemsData.java
private SummonItemsData() { _summonitems = new FastMap<Integer, L2SummonItem>(); Document doc = null;//from w w w . j a v a 2 s.co m File file = new File(Config.DATAPACK_ROOT, "data/summon_items.xml"); try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(true); factory.setIgnoringComments(true); doc = factory.newDocumentBuilder().parse(file); int itemID = 0, npcID = 0; byte summonType = 0; Node a; for (Node n = doc.getFirstChild(); n != null; n = n.getNextSibling()) { if ("list".equalsIgnoreCase(n.getNodeName())) { for (Node d = n.getFirstChild(); d != null; d = d.getNextSibling()) { if ("item".equalsIgnoreCase(d.getNodeName())) { a = d.getAttributes().getNamedItem("id"); if (a == null) throw new Exception("Error in summon item defenition!"); itemID = Integer.parseInt(a.getNodeValue()); for (Node e = d.getFirstChild(); e != null; e = e.getNextSibling()) { if ("npcId".equalsIgnoreCase(e.getNodeName())) { a = e.getAttributes().getNamedItem("val"); if (a == null) throw new Exception( "Not defined npc id for summon item id=" + itemID + "!"); npcID = Integer.parseInt(a.getNodeValue()); } else if ("summonType".equalsIgnoreCase(e.getNodeName())) { a = e.getAttributes().getNamedItem("val"); if (a == null) throw new Exception( "Not defined summon type for summon item id=" + itemID + "!"); summonType = Byte.parseByte(a.getNodeValue()); } } L2SummonItem summonitem = new L2SummonItem(itemID, npcID, summonType); _summonitems.put(itemID, summonitem); } } } } _summonItemIds = new int[_summonitems.size()]; int i = 0; for (int itemId : _summonitems.keySet()) _summonItemIds[i++] = itemId; } catch (IOException e) { _log.warn("SummonItemsData: Can not find " + file.getAbsolutePath() + " !", e); } catch (Exception e) { _log.warn("SummonItemsData: Error while parsing " + file.getAbsolutePath() + " !", e); } _log.info("SummonItemsData: Loaded " + _summonitems.size() + " Summon Items from " + file.getName()); }