List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringComments
public void setIgnoringComments(boolean ignoreComments)
From source file:ubic.gemma.loader.entrez.pubmed.ESearchXMLParser.java
/** * @param is//from w w w. j a v a 2 s .c o m * @return * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ private Document openAndParse(InputStream is) throws IOException, ParserConfigurationException, SAXException { if (is.available() == 0) { throw new IOException("XML stream contains no data."); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); // factory.setValidating( true ); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(is); return document; }
From source file:ubic.gemma.loader.entrez.pubmed.PubMedXMLParser.java
/** * @param is//from ww w . ja v a 2 s.c o m * @return */ public Collection<BibliographicReference> parse(InputStream is) { try { if (is.available() == 0) { throw new IOException("XML stream contains no data."); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); factory.setValidating(false); builder = factory.newDocumentBuilder(); Document document = builder.parse(is); log.debug("done parsing"); return extractBibRefs(document); } catch (IOException e) { throw new RuntimeException(e); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } catch (SAXException e) { throw new RuntimeException(e); } }
From source file:ubic.gemma.loader.entrez.pubmed.XMLUtils.java
/** * @param is/*from ww w . ja v a2 s . c om*/ * @return * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ public static Document openAndParse(InputStream is) throws IOException, ParserConfigurationException, SAXException { if (is.available() == 0) { throw new IOException("XML stream contains no data."); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); // factory.setValidating( true ); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(is); return document; }
From source file:ubic.gemma.web.services.AbstractGemmaEndpoint.java
/** * Looks to parse a previously generated xml report that was saved to disk. Returns null if it fails to do so. * * @param is from an existing xml file//from w w w .j av a 2s . c o m * @return An XML document * @throws IOException IO problems */ protected Document readReport(InputStream is) throws IOException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); factory.setValidating(false); Document document; try { DocumentBuilder builder = factory.newDocumentBuilder(); document = builder.parse(is); } catch (ParserConfigurationException pce) { AbstractGemmaEndpoint.log.error("Could not configure parser for reading report. Error is: " + pce); throw (new RuntimeException(pce)); } catch (SAXException se) { AbstractGemmaEndpoint.log.error("Could not parse report Error is: " + se); throw (new RuntimeException(se)); } return document; }
From source file:uk.co.tfd.symplectic.harvester.XmlAide.java
public static Document loadXmlDocument(String url) throws MalformedURLException, SAXException, IOException, ParserConfigurationException { DocumentBuilderFactory docBuildFactory = DocumentBuilderFactory.newInstance(); docBuildFactory.setIgnoringComments(true); String xmlDoc = null;//from w w w. j a v a 2 s .co m if (url.startsWith("http")) { xmlDoc = ConcurrentHttpFetch.get(url); } else { xmlDoc = WebAide.getURLContents(url); } // doing this fixes makes it work with UTF8 chars return docBuildFactory.newDocumentBuilder().parse(new InputSource(new StringReader(xmlDoc))); }
From source file:uk.me.jeffsutton.pojogen.SimplePOJO.java
public Document parse(BufferedReader xml) throws IOException, SAXException, ParserConfigurationException { String file = ""; try {//from w w w .j av a 2s.co m String str; while ((str = xml.readLine()) != null) { file += str; } } catch (Exception e) { e.printStackTrace(); } file = file.replaceAll("<!DOCTYPE((.|\n|\r)*?)\">", ""); // convert String into InputStream InputStream is = new ByteArrayInputStream(file.getBytes()); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); dbf.setFeature("http://xml.org/sax/features/validation", false); dbf.setNamespaceAware(false); dbf.setIgnoringComments(true); dbf.setValidating(false); dbf.setXIncludeAware(true); return dbf.newDocumentBuilder().parse(is); }