List of usage examples for javax.xml.parsers DocumentBuilder setEntityResolver
public abstract void setEntityResolver(EntityResolver er);
From source file:com.rapidminer.gui.OperatorDocLoader.java
/** * //from w w w . ja v a 2 s. c om * @param operatorWikiName * @param opDesc * @return The parsed <tt>Document</tt> (not finally parsed) of the selected operator. * @throws MalformedURLException * @throws ParserConfigurationException */ private static Document parseDocumentForOperator(String operatorWikiName, OperatorDescription opDesc) throws MalformedURLException, ParserConfigurationException { DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); builderFactory.setIgnoringComments(true); builderFactory.setIgnoringElementContentWhitespace(true); DocumentBuilder documentBuilder = builderFactory.newDocumentBuilder(); documentBuilder.setEntityResolver(new XHTMLEntityResolver()); Document document = null; URL url = new URL(WIKI_PREFIX_FOR_OPERATORS + operatorWikiName); if (url != null) { try { document = documentBuilder.parse(WebServiceTools.openStreamFromURL(url)); } catch (IOException e) { logger.warning("Could not open " + url.toExternalForm() + ": " + e.getMessage()); } catch (SAXException e) { logger.warning("Could not parse operator documentation: " + e.getMessage()); } int i = 0; if (document != null) { Element contentElement = document.getElementById("content"); // removing content element from document if (contentElement != null) { contentElement.getParentNode().removeChild(contentElement); } // removing everything from body NodeList bodies = document.getElementsByTagName("body"); for (int k = 0; k < bodies.getLength(); k++) { Node body = bodies.item(k); while (body.hasChildNodes()) { body.removeChild(body.getFirstChild()); } // read content element to body if (contentElement != null && k == 0) { body.appendChild(contentElement); } } // removing everything from head NodeList heads = document.getElementsByTagName("head"); for (int k = 0; k < heads.getLength(); k++) { Node head = heads.item(k); while (head.hasChildNodes()) { head.removeChild(head.getFirstChild()); } } // removing...<head/> from document if (heads != null) { while (i < heads.getLength()) { Node head = heads.item(i); head.getParentNode().removeChild(head); } } // removing jump-to-nav element from document Element jumpToNavElement = document.getElementById("jump-to-nav"); if (jumpToNavElement != null) { jumpToNavElement.getParentNode().removeChild(jumpToNavElement); } // removing mw-normal-catlinks element from document Element mwNormalCatlinksElement = document.getElementById("mw-normal-catlinks"); if (mwNormalCatlinksElement != null) { mwNormalCatlinksElement.getParentNode().removeChild(mwNormalCatlinksElement); } // removing complete link navigation Element tocElement = document.getElementById("toc"); if (tocElement != null) { tocElement.getParentNode().removeChild(tocElement); } // removing everything from class printfooter NodeList nodeListDiv = document.getElementsByTagName("div"); for (int k = 0; k < nodeListDiv.getLength(); k++) { Element div = (Element) nodeListDiv.item(k); if (div.getAttribute("class").equals("printfooter")) { div.getParentNode().removeChild(div); } } // removing everything from class editsection NodeList spanList = document.getElementsByTagName("span"); for (int k = 0; k < spanList.getLength(); k++) { Element span = (Element) spanList.item(k); if (span.getAttribute("class").equals("editsection")) { span.getParentNode().removeChild(span); } } // Synopsis Header boolean doIt = true; NodeList pList = document.getElementsByTagName("p"); for (int k = 0; k < pList.getLength(); k++) { if (doIt) { Node p = pList.item(k); NodeList pChildList = p.getChildNodes(); for (int j = 0; j < pChildList.getLength(); j++) { Node pChild = pChildList.item(j); if (pChild.getNodeType() == Node.TEXT_NODE && pChild.getNodeValue() != null && StringUtils.isNotBlank(pChild.getNodeValue()) && StringUtils.isNotEmpty(pChild.getNodeValue())) { String pChildString = pChild.getNodeValue(); Element newPWithoutSpaces = document.createElement("p"); newPWithoutSpaces.setTextContent(pChildString); Node synopsis = document.createTextNode("Synopsis"); Element span = document.createElement("span"); span.setAttribute("class", "mw-headline"); span.setAttribute("id", "Synopsis"); span.appendChild(synopsis); Element h2 = document.createElement("h2"); h2.appendChild(span); Element div = document.createElement("div"); div.setAttribute("id", "synopsis"); div.appendChild(h2); div.appendChild(newPWithoutSpaces); Node pChildParentParent = pChild.getParentNode().getParentNode(); Node pChildParent = pChild.getParentNode(); pChildParentParent.replaceChild(div, pChildParent); doIt = false; break; } } } else { break; } } // removing all <br...>-Tags NodeList brList = document.getElementsByTagName("br"); while (i < brList.getLength()) { Node br = brList.item(i); Node parentBrNode = br.getParentNode(); parentBrNode.removeChild(br); } // removing everything from script NodeList scriptList = document.getElementsByTagName("script"); while (i < scriptList.getLength()) { Node scriptNode = scriptList.item(i); Node parentNode = scriptNode.getParentNode(); parentNode.removeChild(scriptNode); } // removing all empty <p...>-Tags NodeList pList2 = document.getElementsByTagName("p"); int ccc = 0; while (ccc < pList2.getLength()) { Node p = pList2.item(ccc); NodeList pChilds = p.getChildNodes(); int kk = 0; while (kk < pChilds.getLength()) { Node pChild = pChilds.item(kk); if (pChild.getNodeType() == Node.TEXT_NODE) { String pNodeValue = pChild.getNodeValue(); if (pNodeValue == null || StringUtils.isBlank(pNodeValue) || StringUtils.isEmpty(pNodeValue)) { kk++; } else { ccc++; break; } } else { ccc++; break; } if (kk == pChilds.getLength()) { Node parentBrNode = p.getParentNode(); parentBrNode.removeChild(p); } } } // removing firstHeading element from document Element firstHeadingElement = document.getElementById("firstHeading"); if (firstHeadingElement != null) { CURRENT_OPERATOR_NAME_READ_FROM_RAPIDWIKI = firstHeadingElement.getFirstChild().getNodeValue() .replaceFirst(".*:", ""); firstHeadingElement.getParentNode().removeChild(firstHeadingElement); } // setting operator plugin name if (opDesc != null && opDesc.getProvider() != null) { CURRENT_OPERATOR_PLUGIN_NAME = opDesc.getProvider().getName(); } // removing sitesub element from document Element siteSubElement = document.getElementById("siteSub"); if (siteSubElement != null) { siteSubElement.getParentNode().removeChild(siteSubElement); } // removing contentSub element from document Element contentSubElement = document.getElementById("contentSub"); if (contentSubElement != null) { contentSubElement.getParentNode().removeChild(contentSubElement); } // removing catlinks element from document Element catlinksElement = document.getElementById("catlinks"); if (catlinksElement != null) { catlinksElement.getParentNode().removeChild(catlinksElement); } // removing <a...> element from document, if they are empty NodeList aList = document.getElementsByTagName("a"); if (aList != null) { int k = 0; while (k < aList.getLength()) { Node a = aList.item(k); Element aElement = (Element) a; if (aElement.getAttribute("class").equals("internal")) { a.getParentNode().removeChild(a); } else { Node aChild = a.getFirstChild(); if (aChild != null && (aChild.getNodeValue() != null && aChild.getNodeType() == Node.TEXT_NODE && StringUtils.isNotBlank(aChild.getNodeValue()) && StringUtils.isNotEmpty(aChild.getNodeValue()) || aChild.getNodeName() != null)) { Element aChildElement = null; if (aChild.getNodeName().startsWith("img")) { aChildElement = (Element) aChild; Element imgElement = document.createElement("img"); imgElement.setAttribute("alt", aChildElement.getAttribute("alt")); imgElement.setAttribute("class", aChildElement.getAttribute("class")); imgElement.setAttribute("height", aChildElement.getAttribute("height")); imgElement.setAttribute("src", WIKI_PREFIX_FOR_IMAGES + aChildElement.getAttribute("src")); imgElement.setAttribute("width", aChildElement.getAttribute("width")); imgElement.setAttribute("border", "1"); Node aParent = a.getParentNode(); aParent.replaceChild(imgElement, a); } else { k++; } } else { a.getParentNode().removeChild(a); } } } } } } return document; }
From source file:com.ikon.util.FormUtils.java
/** * Parse params.xml definitions// ww w . j a v a 2 s. c om * * @return A List parameter elements. */ public static List<FormElement> parseReportParameters(InputStream is) throws ParseException { log.debug("parseReportParameters({})", is); List<FormElement> params = new ArrayList<FormElement>(); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); if (is != null) { Document doc = db.parse(is); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("report-parameters"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { NodeList nlField = nForm.getChildNodes(); params = parseField(nlField); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage(), e); } catch (SAXException e) { throw new ParseException(e.getMessage(), e); } catch (IOException e) { throw new ParseException(e.getMessage(), e); } log.debug("parseReportParameters: {}", params); return params; }
From source file:com.ikon.util.FormUtils.java
/** * Parse form.xml definitions// w w w .java 2 s .co m * * @return A Map with all the forms and its form elements. */ public static Map<String, List<FormElement>> parseWorkflowForms(InputStream is) throws ParseException { log.debug("parseWorkflowForms({})", is); Map<String, List<FormElement>> forms = new HashMap<String, List<FormElement>>(); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); if (is != null) { Document doc = db.parse(is); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("workflow-form"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { String taskName = nForm.getAttributes().getNamedItem("task").getNodeValue(); NodeList nlField = nForm.getChildNodes(); List<FormElement> fe = parseField(nlField); forms.put(taskName, fe); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage(), e); } catch (SAXException e) { throw new ParseException(e.getMessage(), e); } catch (IOException e) { throw new ParseException(e.getMessage(), e); } log.debug("parseWorkflowForms: {}", forms); return forms; }
From source file:com.ikon.util.FormUtils.java
/** * Parse PropertyGroups.xml definitions/*from w w w. j ava 2s . c o m*/ * * @return A Map with all the forms and its form elements. */ public static synchronized Map<PropertyGroup, List<FormElement>> parsePropertyGroupsForms(String pgForm) throws IOException, ParseException { log.debug("parsePropertyGroupsForms({})", pgForm); if (pGroups == null) { pGroups = new HashMap<PropertyGroup, List<FormElement>>(); FileInputStream fis = null; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); fis = new FileInputStream(pgForm); if (fis != null) { Document doc = db.parse(fis); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("property-group"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { PropertyGroup pg = new PropertyGroup(); Node item = nForm.getAttributes().getNamedItem("label"); if (item != null) pg.setLabel(item.getNodeValue()); item = nForm.getAttributes().getNamedItem("name"); if (item != null) pg.setName(item.getNodeValue()); item = nForm.getAttributes().getNamedItem("visible"); if (item != null) pg.setVisible(Boolean.valueOf(item.getNodeValue())); item = nForm.getAttributes().getNamedItem("readonly"); if (item != null) pg.setReadonly(Boolean.valueOf(item.getNodeValue())); NodeList nlField = nForm.getChildNodes(); List<FormElement> fe = parseField(nlField); pGroups.put(pg, fe); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage()); } catch (SAXException e) { throw new ParseException(e.getMessage()); } catch (IOException e) { throw e; } finally { IOUtils.closeQuietly(fis); } } log.debug("parsePropertyGroupsForms: {}", pGroups); return clonedPropertyGroups(); }
From source file:com.openkm.util.FormUtils.java
/** * Parse params.xml definitions// w w w . j a v a 2 s. c o m * * @return A List parameter elements. */ public static List<FormElement> parseReportParameters(InputStream is) throws ParseException { log.debug("parseReportParameters({})", is); long begin = System.currentTimeMillis(); List<FormElement> params = new ArrayList<FormElement>(); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); if (is != null) { Document doc = db.parse(is); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("report-parameters"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { NodeList nlField = nForm.getChildNodes(); params = parseField(nlField); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage(), e); } catch (SAXException e) { throw new ParseException(e.getMessage(), e); } catch (IOException e) { throw new ParseException(e.getMessage(), e); } log.trace("parseReportParameters.Time: {}", System.currentTimeMillis() - begin); log.debug("parseReportParameters: {}", params); return params; }
From source file:com.openkm.util.FormUtils.java
/** * Parse form.xml definitions//from w w w . j a va 2 s.c om * * @return A Map with all the forms and its form elements. */ public static Map<String, List<FormElement>> parseWorkflowForms(InputStream is) throws ParseException { log.debug("parseWorkflowForms({})", is); long begin = System.currentTimeMillis(); Map<String, List<FormElement>> forms = new HashMap<String, List<FormElement>>(); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); if (is != null) { Document doc = db.parse(is); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("workflow-form"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { String taskName = nForm.getAttributes().getNamedItem("task").getNodeValue(); NodeList nlField = nForm.getChildNodes(); List<FormElement> fe = parseField(nlField); forms.put(taskName, fe); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage(), e); } catch (SAXException e) { throw new ParseException(e.getMessage(), e); } catch (IOException e) { throw new ParseException(e.getMessage(), e); } log.trace("parseWorkflowForms.Time: {}", System.currentTimeMillis() - begin); log.debug("parseWorkflowForms: {}", forms); return forms; }
From source file:channellistmaker.listmaker.XMLLoader.java
/** * XML?/*ww w. j a va 2 s. c o m*/ * * @param F XML * @return XML???Document * @author dosdiaopfhj */ public synchronized Document Load(File F) { try { LOG.info(" = " + F + " = " + getCharset() + " ??"); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder documentBuilder = factory.newDocumentBuilder(); documentBuilder.setEntityResolver(new XmlTvDtdResolver()); Document document = documentBuilder .parse(new InputSource(new InputStreamReader(new FileInputStream(F), getCharset()))); // Element root = document.getDocumentElement(); LOG.info(" = " + F + " = " + getCharset() + " ??"); return document; } catch (ParserConfigurationException | UnsupportedEncodingException | FileNotFoundException ex) { LOG.fatal("", ex); return null; } catch (SAXException | IOException ex) { LOG.fatal("", ex); return null; } }
From source file:com.openkm.util.FormUtils.java
/** * Parse PropertyGroups.xml definitions// www. ja va 2s . c om * * @param pgDefFile Path to file where is the Property Groups definition. * @return A Map with all the forms and its form elements. */ public static synchronized Map<PropertyGroup, List<FormElement>> parsePropertyGroupsForms(String pgDefFile) throws IOException, ParseException { log.debug("parsePropertyGroupsForms({})", pgDefFile); if (pGroups == null) { long begin = System.currentTimeMillis(); pGroups = new HashMap<PropertyGroup, List<FormElement>>(); FileInputStream fis = null; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setValidating(true); ErrorHandler handler = new ErrorHandler(); // EntityResolver resolver = new LocalResolver(Config.DTD_BASE); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(handler); db.setEntityResolver(resolver); fis = new FileInputStream(pgDefFile); if (fis != null) { Document doc = db.parse(fis); doc.getDocumentElement().normalize(); NodeList nlForm = doc.getElementsByTagName("property-group"); for (int i = 0; i < nlForm.getLength(); i++) { Node nForm = nlForm.item(i); if (nForm.getNodeType() == Node.ELEMENT_NODE) { PropertyGroup pg = new PropertyGroup(); Node item = nForm.getAttributes().getNamedItem("label"); if (item != null) pg.setLabel(item.getNodeValue()); item = nForm.getAttributes().getNamedItem("name"); if (item != null) pg.setName(item.getNodeValue()); item = nForm.getAttributes().getNamedItem("visible"); if (item != null) pg.setVisible(Boolean.valueOf(item.getNodeValue())); item = nForm.getAttributes().getNamedItem("readonly"); if (item != null) pg.setReadonly(Boolean.valueOf(item.getNodeValue())); NodeList nlField = nForm.getChildNodes(); List<FormElement> fe = parseField(nlField); pGroups.put(pg, fe); } } } } catch (ParserConfigurationException e) { throw new ParseException(e.getMessage()); } catch (SAXException e) { throw new ParseException(e.getMessage()); } catch (IOException e) { throw e; } finally { IOUtils.closeQuietly(fis); } log.trace("parsePropertyGroupsForms.Time: {}", System.currentTimeMillis() - begin); } log.debug("parsePropertyGroupsForms: {}", pGroups); return clonedPropertyGroups(); }
From source file:edu.internet2.middleware.shibboleth.common.config.SpringDocumentLoader.java
/** {@inheritDoc} */ public Document loadDocument(InputSource inputSource, EntityResolver entityResolver, ErrorHandler errorHandler, int validationMode, boolean namespaceAware) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); factory.setCoalescing(true);/*from w w w .ja va2s .com*/ factory.setIgnoringComments(true); factory.setNamespaceAware(true); factory.setValidating(true); DocumentBuilder builder = factory.newDocumentBuilder(); builder.setErrorHandler(new LoggingErrorHandler(log)); builder.setEntityResolver(new ClasspathResolver()); return builder.parse(inputSource); }
From source file:com.idiominc.ws.opentopic.fo.i18n.PreprocessorTask.java
@Override public void execute() throws BuildException { checkParameters();// w w w. j a va 2s . co m log("Processing " + input + " to " + output, Project.MSG_INFO); OutputStream out = null; try { final DocumentBuilder documentBuilder = XMLUtils.getDocumentBuilder(); documentBuilder.setEntityResolver(xmlcatalog); final Document doc = documentBuilder.parse(input); final Document conf = documentBuilder.parse(config); final MultilanguagePreprocessor preprocessor = new MultilanguagePreprocessor(new Configuration(conf)); final Document document = preprocessor.process(doc); final TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setURIResolver(xmlcatalog); final Transformer transformer; if (style != null) { log("Loading stylesheet " + style, Project.MSG_INFO); transformer = transformerFactory.newTransformer(new StreamSource(style)); } else { transformer = transformerFactory.newTransformer(); } transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); transformer.setOutputProperty(OutputKeys.INDENT, "no"); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); if (doc.getDoctype() != null) { transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doc.getDoctype().getPublicId()); transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, doc.getDoctype().getSystemId()); } out = new FileOutputStream(output); final StreamResult streamResult = new StreamResult(out); transformer.transform(new DOMSource(document), streamResult); } catch (final RuntimeException e) { throw e; } catch (final Exception e) { throw new BuildException(e); } finally { IOUtils.closeQuietly(out); } }