List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringComments
public void setIgnoringComments(boolean ignoreComments)
From source file:org.dspace.submit.lookup.PubmedService.java
public List<Record> getByPubmedIDs(List<String> pubmedIDs) throws HttpException, IOException, ParserConfigurationException, SAXException { List<Record> results = new ArrayList<Record>(); HttpGet method = null;//from w ww.j ava 2 s . co m try { HttpClient client = new DefaultHttpClient(); client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 5 * timeout); try { URIBuilder uriBuilder = new URIBuilder("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"); uriBuilder.addParameter("db", "pubmed"); uriBuilder.addParameter("retmode", "xml"); uriBuilder.addParameter("rettype", "full"); uriBuilder.addParameter("id", StringUtils.join(pubmedIDs.iterator(), ",")); method = new HttpGet(uriBuilder.build()); } catch (URISyntaxException ex) { throw new RuntimeException("Request not sent", ex); } // Execute the method. HttpResponse response = client.execute(method); StatusLine statusLine = response.getStatusLine(); int statusCode = statusLine.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { throw new RuntimeException("WS call failed: " + statusLine); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document inDoc = builder.parse(response.getEntity().getContent()); Element xmlRoot = inDoc.getDocumentElement(); List<Element> pubArticles = XMLUtils.getElementList(xmlRoot, "PubmedArticle"); for (Element xmlArticle : pubArticles) { Record pubmedItem = null; try { pubmedItem = PubmedUtils.convertPubmedDomToRecord(xmlArticle); results.add(pubmedItem); } catch (Exception e) { throw new RuntimeException("PubmedID is not valid or not exist: " + e.getMessage(), e); } } return results; } finally { if (method != null) { method.releaseConnection(); } } }
From source file:com.panet.imeta.core.xml.XMLHandler.java
/** * Load a file into an XML document// w w w .j a va2 s. co m * * @param filename * The filename to load into a document * @param systemId * Provide a base for resolving relative URIs. * @param ignoreEntities * Ignores external entities and returns an empty dummy. * @param namespaceAware * support XML namespaces. * @return the Document if all went well, null if an error occured! */ public static final Document loadXMLFile(FileObject fileObject, String systemID, boolean ignoreEntities, boolean namespaceAware) throws KettleXMLException { DocumentBuilderFactory dbf; DocumentBuilder db; Document doc; try { // Check and open XML document dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringComments(true); dbf.setNamespaceAware(namespaceAware); db = dbf.newDocumentBuilder(); // even dbf.setValidating(false) will the parser NOT prevent from // checking the existance of the DTD // thus we need to give the BaseURI (systemID) below to have a // chance to get it // or return empty dummy documents for all external entities // (sources) if (ignoreEntities) db.setEntityResolver(new DTDIgnoringEntityResolver()); InputStream inputStream = null; try { if (Const.isEmpty(systemID)) { // Normal parsing // inputStream = KettleVFS.getInputStream(fileObject); doc = db.parse(inputStream); } else { // Do extra verifications // String systemIDwithEndingSlash = systemID.trim(); // make sure we have an ending slash, otherwise the last // part will be ignored if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) { systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/"); } inputStream = KettleVFS.getInputStream(fileObject); doc = db.parse(inputStream, systemIDwithEndingSlash); } } catch (FileNotFoundException ef) { throw new KettleXMLException(ef); } finally { if (inputStream != null) inputStream.close(); } return doc; } catch (Exception e) { throw new KettleXMLException("Error reading information from file", e); } }
From source file:org.dspace.submit.lookup.PubmedService.java
public List<Record> search(String query) throws IOException, HttpException { List<Record> results = new ArrayList<>(); if (!ConfigurationManager.getBooleanProperty(SubmissionLookupService.CFG_MODULE, "remoteservice.demo")) { HttpGet method = null;/*from w w w . j a v a2s.co m*/ try { HttpClient client = new DefaultHttpClient(); client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); URIBuilder uriBuilder = new URIBuilder("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"); uriBuilder.addParameter("db", "pubmed"); uriBuilder.addParameter("datetype", "edat"); uriBuilder.addParameter("retmax", "10"); uriBuilder.addParameter("term", query); method = new HttpGet(uriBuilder.build()); // Execute the method. HttpResponse response = client.execute(method); StatusLine statusLine = response.getStatusLine(); int statusCode = statusLine.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { throw new RuntimeException("WS call failed: " + statusLine); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder builder; try { builder = factory.newDocumentBuilder(); Document inDoc = builder.parse(response.getEntity().getContent()); Element xmlRoot = inDoc.getDocumentElement(); Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList"); List<String> pubmedIDs = XMLUtils.getElementValueList(idList, "Id"); results = getByPubmedIDs(pubmedIDs); } catch (ParserConfigurationException e1) { log.error(e1.getMessage(), e1); } catch (SAXException e1) { log.error(e1.getMessage(), e1); } } catch (Exception e1) { log.error(e1.getMessage(), e1); } finally { if (method != null) { method.releaseConnection(); } } } else { InputStream stream = null; try { File file = new File(ConfigurationManager.getProperty("dspace.dir") + "/config/crosswalks/demo/pubmed-search.xml"); stream = new FileInputStream(file); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document inDoc = builder.parse(stream); Element xmlRoot = inDoc.getDocumentElement(); Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList"); List<String> pubmedIDs = XMLUtils.getElementValueList(idList, "Id"); results = getByPubmedIDs(pubmedIDs); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } finally { if (stream != null) { try { stream.close(); } catch (IOException e) { e.printStackTrace(); } } } } return results; }
From source file:com.panet.imeta.core.xml.XMLHandler.java
/** * Load a file into an XML document//ww w.jav a 2s . c om * * @param inputStream * The stream to load a document from * @param systemId * Provide a base for resolving relative URIs. * @param ignoreEntities * Ignores external entities and returns an empty dummy. * @param namespaceAware * support XML namespaces. * @return the Document if all went well, null if an error occured! */ public static final Document loadXMLFile(InputStream inputStream, String systemID, boolean ignoreEntities, boolean namespaceAware) throws KettleXMLException { DocumentBuilderFactory dbf; DocumentBuilder db; Document doc; try { // Check and open XML document // dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringComments(true); dbf.setNamespaceAware(namespaceAware); db = dbf.newDocumentBuilder(); // even dbf.setValidating(false) will the parser NOT prevent from // checking the existance of the DTD // thus we need to give the BaseURI (systemID) below to have a // chance to get it // or return empty dummy documents for all external entities // (sources) // if (ignoreEntities) { db.setEntityResolver(new DTDIgnoringEntityResolver()); } try { if (Const.isEmpty(systemID)) { // Normal parsing // doc = db.parse(inputStream); } else { // Do extra verifications // String systemIDwithEndingSlash = systemID.trim(); // make sure we have an ending slash, otherwise the last // part will be ignored // if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) { systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/"); } doc = db.parse(inputStream, systemIDwithEndingSlash); } } catch (FileNotFoundException ef) { throw new KettleXMLException(ef); } finally { if (inputStream != null) inputStream.close(); } return doc; } catch (Exception e) { throw new KettleXMLException("Error reading information from input stream", e); } }
From source file:edu.duke.cabig.c3pr.webservice.integration.StudyImportExportWebServiceTest.java
private Node getSOAPBodyFromXML(String xmlBaseFileName) throws IOException, SAXException, ParserConfigurationException { InputStream is = getResource(null, TESTDATA_PACKAGE + "/" + xmlBaseFileName + ".xml"); String xmlStr = IOUtils.toString(is); xmlStr = xmlStr.replace("${STUDY_ID}", STUDY_ID); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true);//from w w w.j a v a2 s. co m dbf.setIgnoringComments(true); org.w3c.dom.Document doc = dbf.newDocumentBuilder().parse(IOUtils.toInputStream(xmlStr)); IOUtils.closeQuietly(is); return doc.getChildNodes().item(0); }
From source file:com.ibm.bi.dml.conf.DMLConfig.java
/** * Method to parse configuration//from w ww.j a va2 s . c o m * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ private void parseConfig() throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); //ignore XML comments DocumentBuilder builder = factory.newDocumentBuilder(); Document domTree = null; if (config_file_name.startsWith("hdfs:") || config_file_name.startsWith("gpfs:")) // config file from DFS { if (!LocalFileUtils.validateExternalFilename(config_file_name, true)) throw new IOException("Invalid (non-trustworthy) hdfs config filename."); FileSystem DFS = FileSystem.get(ConfigurationManager.getCachedJobConf()); Path configFilePath = new Path(config_file_name); domTree = builder.parse(DFS.open(configFilePath)); } else // config from local file system { if (!LocalFileUtils.validateExternalFilename(config_file_name, false)) throw new IOException("Invalid (non-trustworthy) local config filename."); domTree = builder.parse(config_file_name); } xml_root = domTree.getDocumentElement(); }
From source file:io.fabric8.forge.ipaas.repository.NexusConnectionRepository.java
protected void indexNexus() throws Exception { // must have q parameter so use connector to find all connectors String query = nexusUrl + "?q=connector"; URL url = new URL(query); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true);//from w w w . java 2s .c o m factory.setIgnoringElementContentWhitespace(true); factory.setIgnoringComments(true); DocumentBuilder documentBuilder = factory.newDocumentBuilder(); InputStream is = url.openStream(); Document dom = documentBuilder.parse(is); XPathFactory xpFactory = XPathFactory.newInstance(); XPath exp = xpFactory.newXPath(); NodeList list = (NodeList) exp.evaluate("//classifier[text() = '" + CLASSIFIER + "']", dom, XPathConstants.NODESET); Set<NexusArtifactDto> newArtifacts = new LinkedHashSet<>(); for (int i = 0; i < list.getLength(); i++) { Node node = list.item(i); Node parent = node.getParentNode(); String g = getNodeText(parent.getChildNodes(), "groupId"); String a = getNodeText(parent.getChildNodes(), "artifactId"); String v = getNodeText(parent.getChildNodes(), "version"); String l = getNodeText(parent.getChildNodes(), "artifactLink"); if (g != null & a != null & v != null & l != null) { NexusArtifactDto dto = new NexusArtifactDto(); dto.setGroupId(g); dto.setArtifactId(a); dto.setVersion(v); dto.setArtifactLink(l); System.out.println("Found connector: " + dto.getGroupId() + ":" + dto.getArtifactId() + ":" + dto.getVersion()); // is it a new artifact boolean newArtifact = true; for (NexusArtifactDto existing : indexedArtifacts) { if (existing.getGroupId().equals(dto.getGroupId()) && existing.getArtifactId().equals(dto.getArtifactId()) && existing.getVersion().equals(dto.getVersion())) { newArtifact = false; break; } } if (newArtifact) { newArtifacts.add(dto); } } } // now download the new artifact JARs and look inside to find more details for (NexusArtifactDto dto : newArtifacts) { try { // download using url classloader reader URL jarUrl = new URL(dto.getArtifactLink()); String json = loadCamelConnectorJSonSchema(jarUrl); ObjectMapper mapper = new ObjectMapper(); ConnectionCatalogDto cat = mapper.readerFor(ConnectionCatalogDto.class).readValue(json); indexedArtifacts.add(dto); connectors.putIfAbsent(dto, cat); System.out.println("Added connector: " + dto.getGroupId() + ":" + dto.getArtifactId() + ":" + dto.getVersion()); } catch (Exception e) { System.err.println("Error downloading connector JAR " + dto.getArtifactLink() + ". This exception is ignored. " + e.getMessage()); } } IOHelpers.close(is); }
From source file:com.mediaworx.xmlutils.XmlHelper.java
/** * Creates and returns a document builder that is configured with the following options: * <ul>/* w w w . ja v a2s .c o m*/ * <li>don't validate</li> * <li>ignore comments</li> * <li>ignore content whitespace</li> * <li>convert CDATA nodes to text nodes</li> * <li>don't perform namespace processing</li> * <li>ignore DTDs</li> * </ul> * @return the DocumentBuilder * @throws ParserConfigurationException if for some reason the DocumentBuilder used to parse the XML can't be * initialized */ private DocumentBuilder getNonValidatingDocumentBuilder() throws ParserConfigurationException { DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setValidating(false); documentBuilderFactory.setIgnoringComments(true); documentBuilderFactory.setIgnoringElementContentWhitespace(true); documentBuilderFactory.setCoalescing(true); documentBuilderFactory.setFeature("http://xml.org/sax/features/namespaces", false); documentBuilderFactory.setFeature("http://xml.org/sax/features/validation", false); documentBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); documentBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); return documentBuilderFactory.newDocumentBuilder(); }
From source file:com.jaspersoft.studio.custom.adapter.controls.DynamicControlComposite.java
/** * Search a castor mapping file inside the data adapter jar and if it is found create the controls * to edit it//www. j ava2 s . co m */ protected void createDynamicControls() { String xmlDefinition = getXmlDefinitionLocation(); if (xmlDefinition != null) { DataAdapter adapter = dataAdapterDescriptor.getDataAdapter(); InputStream is = dataAdapterDescriptor.getClass().getResourceAsStream("/" + xmlDefinition); if (null != is) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(false); dbf.setIgnoringComments(true); dbf.setNamespaceAware(false); DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (systemId.contains("http://castor.org/mapping.dtd")) { return new InputSource(new StringReader("")); } else { return null; } } }); Document document = builder.parse(is); Node mapNode = document.getDocumentElement(); if (mapNode.getNodeName().equals("mapping")) { NodeList adapterNodes = mapNode.getChildNodes(); for (int j = 0; j < adapterNodes.getLength(); ++j) { Node adapterNode = adapterNodes.item(j); if (adapterNode.getNodeName().equals("class")) { String classAttribute = adapterNode.getAttributes().getNamedItem("name") .getNodeValue(); if (classAttribute != null && classAttribute.equals(adapter.getClass().getName())) { createDynamicControls(adapterNode.getChildNodes()); is.close(); return; } } } } } catch (Exception ex) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } ex.printStackTrace(); } } } }