Example usage for javax.xml.parsers DocumentBuilderFactory setIgnoringComments

List of usage examples for javax.xml.parsers DocumentBuilderFactory setIgnoringComments

Introduction

In this page you can find the example usage for javax.xml.parsers DocumentBuilderFactory setIgnoringComments.

Prototype


public void setIgnoringComments(boolean ignoreComments) 

Source Link

Document

Specifies that the parser produced by this code will ignore comments.

Usage

From source file:org.dspace.submit.lookup.PubmedService.java

public List<Record> getByPubmedIDs(List<String> pubmedIDs)
        throws HttpException, IOException, ParserConfigurationException, SAXException {
    List<Record> results = new ArrayList<Record>();
    HttpGet method = null;//from  w ww.j ava  2 s .  co  m
    try {
        HttpClient client = new DefaultHttpClient();
        client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 5 * timeout);

        try {
            URIBuilder uriBuilder = new URIBuilder("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi");
            uriBuilder.addParameter("db", "pubmed");
            uriBuilder.addParameter("retmode", "xml");
            uriBuilder.addParameter("rettype", "full");
            uriBuilder.addParameter("id", StringUtils.join(pubmedIDs.iterator(), ","));
            method = new HttpGet(uriBuilder.build());
        } catch (URISyntaxException ex) {
            throw new RuntimeException("Request not sent", ex);
        }

        // Execute the method.
        HttpResponse response = client.execute(method);
        StatusLine statusLine = response.getStatusLine();
        int statusCode = statusLine.getStatusCode();

        if (statusCode != HttpStatus.SC_OK) {
            throw new RuntimeException("WS call failed: " + statusLine);
        }

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setValidating(false);
        factory.setIgnoringComments(true);
        factory.setIgnoringElementContentWhitespace(true);

        DocumentBuilder builder = factory.newDocumentBuilder();
        Document inDoc = builder.parse(response.getEntity().getContent());

        Element xmlRoot = inDoc.getDocumentElement();
        List<Element> pubArticles = XMLUtils.getElementList(xmlRoot, "PubmedArticle");

        for (Element xmlArticle : pubArticles) {
            Record pubmedItem = null;
            try {
                pubmedItem = PubmedUtils.convertPubmedDomToRecord(xmlArticle);
                results.add(pubmedItem);
            } catch (Exception e) {
                throw new RuntimeException("PubmedID is not valid or not exist: " + e.getMessage(), e);
            }
        }

        return results;
    } finally {
        if (method != null) {
            method.releaseConnection();
        }
    }
}

From source file:com.panet.imeta.core.xml.XMLHandler.java

/**
 * Load a file into an XML document// w w  w .j a va2  s.  co  m
 * 
 * @param filename
 *            The filename to load into a document
 * @param systemId
 *            Provide a base for resolving relative URIs.
 * @param ignoreEntities
 *            Ignores external entities and returns an empty dummy.
 * @param namespaceAware
 *            support XML namespaces.
 * @return the Document if all went well, null if an error occured!
 */
public static final Document loadXMLFile(FileObject fileObject, String systemID, boolean ignoreEntities,
        boolean namespaceAware) throws KettleXMLException {
    DocumentBuilderFactory dbf;
    DocumentBuilder db;
    Document doc;

    try {
        // Check and open XML document
        dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setNamespaceAware(namespaceAware);
        db = dbf.newDocumentBuilder();
        // even dbf.setValidating(false) will the parser NOT prevent from
        // checking the existance of the DTD
        // thus we need to give the BaseURI (systemID) below to have a
        // chance to get it
        // or return empty dummy documents for all external entities
        // (sources)
        if (ignoreEntities)
            db.setEntityResolver(new DTDIgnoringEntityResolver());
        InputStream inputStream = null;
        try {
            if (Const.isEmpty(systemID)) {
                // Normal parsing
                //
                inputStream = KettleVFS.getInputStream(fileObject);
                doc = db.parse(inputStream);
            } else {
                // Do extra verifications
                //
                String systemIDwithEndingSlash = systemID.trim();
                // make sure we have an ending slash, otherwise the last
                // part will be ignored
                if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) {
                    systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/");
                }
                inputStream = KettleVFS.getInputStream(fileObject);
                doc = db.parse(inputStream, systemIDwithEndingSlash);
            }
        } catch (FileNotFoundException ef) {
            throw new KettleXMLException(ef);
        } finally {
            if (inputStream != null)
                inputStream.close();
        }

        return doc;
    } catch (Exception e) {
        throw new KettleXMLException("Error reading information from file", e);
    }
}

From source file:org.dspace.submit.lookup.PubmedService.java

public List<Record> search(String query) throws IOException, HttpException {
    List<Record> results = new ArrayList<>();
    if (!ConfigurationManager.getBooleanProperty(SubmissionLookupService.CFG_MODULE, "remoteservice.demo")) {
        HttpGet method = null;/*from w w  w .  j  a  v a2s.co  m*/
        try {
            HttpClient client = new DefaultHttpClient();
            client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout);

            URIBuilder uriBuilder = new URIBuilder("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi");
            uriBuilder.addParameter("db", "pubmed");
            uriBuilder.addParameter("datetype", "edat");
            uriBuilder.addParameter("retmax", "10");
            uriBuilder.addParameter("term", query);
            method = new HttpGet(uriBuilder.build());

            // Execute the method.
            HttpResponse response = client.execute(method);
            StatusLine statusLine = response.getStatusLine();
            int statusCode = statusLine.getStatusCode();

            if (statusCode != HttpStatus.SC_OK) {
                throw new RuntimeException("WS call failed: " + statusLine);
            }

            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setValidating(false);
            factory.setIgnoringComments(true);
            factory.setIgnoringElementContentWhitespace(true);

            DocumentBuilder builder;
            try {
                builder = factory.newDocumentBuilder();

                Document inDoc = builder.parse(response.getEntity().getContent());

                Element xmlRoot = inDoc.getDocumentElement();
                Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList");
                List<String> pubmedIDs = XMLUtils.getElementValueList(idList, "Id");
                results = getByPubmedIDs(pubmedIDs);
            } catch (ParserConfigurationException e1) {
                log.error(e1.getMessage(), e1);
            } catch (SAXException e1) {
                log.error(e1.getMessage(), e1);
            }
        } catch (Exception e1) {
            log.error(e1.getMessage(), e1);
        } finally {
            if (method != null) {
                method.releaseConnection();
            }
        }
    } else {
        InputStream stream = null;
        try {
            File file = new File(ConfigurationManager.getProperty("dspace.dir")
                    + "/config/crosswalks/demo/pubmed-search.xml");
            stream = new FileInputStream(file);
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setValidating(false);
            factory.setIgnoringComments(true);
            factory.setIgnoringElementContentWhitespace(true);

            DocumentBuilder builder = factory.newDocumentBuilder();
            Document inDoc = builder.parse(stream);

            Element xmlRoot = inDoc.getDocumentElement();
            Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList");
            List<String> pubmedIDs = XMLUtils.getElementValueList(idList, "Id");
            results = getByPubmedIDs(pubmedIDs);
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        } finally {
            if (stream != null) {
                try {
                    stream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    return results;
}

From source file:com.panet.imeta.core.xml.XMLHandler.java

/**
 * Load a file into an XML document//ww w.jav  a  2s . c  om
 * 
 * @param inputStream
 *            The stream to load a document from
 * @param systemId
 *            Provide a base for resolving relative URIs.
 * @param ignoreEntities
 *            Ignores external entities and returns an empty dummy.
 * @param namespaceAware
 *            support XML namespaces.
 * @return the Document if all went well, null if an error occured!
 */
public static final Document loadXMLFile(InputStream inputStream, String systemID, boolean ignoreEntities,
        boolean namespaceAware) throws KettleXMLException {
    DocumentBuilderFactory dbf;
    DocumentBuilder db;
    Document doc;

    try {
        // Check and open XML document
        //
        dbf = DocumentBuilderFactory.newInstance();
        dbf.setIgnoringComments(true);
        dbf.setNamespaceAware(namespaceAware);
        db = dbf.newDocumentBuilder();

        // even dbf.setValidating(false) will the parser NOT prevent from
        // checking the existance of the DTD
        // thus we need to give the BaseURI (systemID) below to have a
        // chance to get it
        // or return empty dummy documents for all external entities
        // (sources)
        //
        if (ignoreEntities) {
            db.setEntityResolver(new DTDIgnoringEntityResolver());
        }

        try {
            if (Const.isEmpty(systemID)) {
                // Normal parsing
                //
                doc = db.parse(inputStream);
            } else {
                // Do extra verifications
                //
                String systemIDwithEndingSlash = systemID.trim();

                // make sure we have an ending slash, otherwise the last
                // part will be ignored
                //
                if (!systemIDwithEndingSlash.endsWith("/") && !systemIDwithEndingSlash.endsWith("\\")) {
                    systemIDwithEndingSlash = systemIDwithEndingSlash.concat("/");
                }
                doc = db.parse(inputStream, systemIDwithEndingSlash);
            }
        } catch (FileNotFoundException ef) {
            throw new KettleXMLException(ef);
        } finally {
            if (inputStream != null)
                inputStream.close();
        }

        return doc;
    } catch (Exception e) {
        throw new KettleXMLException("Error reading information from input stream", e);
    }
}

From source file:edu.duke.cabig.c3pr.webservice.integration.StudyImportExportWebServiceTest.java

private Node getSOAPBodyFromXML(String xmlBaseFileName)
        throws IOException, SAXException, ParserConfigurationException {
    InputStream is = getResource(null, TESTDATA_PACKAGE + "/" + xmlBaseFileName + ".xml");
    String xmlStr = IOUtils.toString(is);
    xmlStr = xmlStr.replace("${STUDY_ID}", STUDY_ID);
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);//from w w w.j  a  v a2 s.  co m
    dbf.setIgnoringComments(true);
    org.w3c.dom.Document doc = dbf.newDocumentBuilder().parse(IOUtils.toInputStream(xmlStr));
    IOUtils.closeQuietly(is);
    return doc.getChildNodes().item(0);
}

From source file:com.ibm.bi.dml.conf.DMLConfig.java

/**
 * Method to parse configuration//from   w ww.j a va2  s . c  o  m
 * @throws ParserConfigurationException
 * @throws SAXException
 * @throws IOException
 */
private void parseConfig() throws ParserConfigurationException, SAXException, IOException {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setIgnoringComments(true); //ignore XML comments
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document domTree = null;
    if (config_file_name.startsWith("hdfs:") || config_file_name.startsWith("gpfs:")) // config file from DFS
    {
        if (!LocalFileUtils.validateExternalFilename(config_file_name, true))
            throw new IOException("Invalid (non-trustworthy) hdfs config filename.");
        FileSystem DFS = FileSystem.get(ConfigurationManager.getCachedJobConf());
        Path configFilePath = new Path(config_file_name);
        domTree = builder.parse(DFS.open(configFilePath));
    } else // config from local file system
    {
        if (!LocalFileUtils.validateExternalFilename(config_file_name, false))
            throw new IOException("Invalid (non-trustworthy) local config filename.");
        domTree = builder.parse(config_file_name);
    }

    xml_root = domTree.getDocumentElement();
}

From source file:io.fabric8.forge.ipaas.repository.NexusConnectionRepository.java

protected void indexNexus() throws Exception {
    // must have q parameter so use connector to find all connectors
    String query = nexusUrl + "?q=connector";
    URL url = new URL(query);

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);//from  w w w . java  2s .c  o m
    factory.setIgnoringElementContentWhitespace(true);
    factory.setIgnoringComments(true);

    DocumentBuilder documentBuilder = factory.newDocumentBuilder();

    InputStream is = url.openStream();
    Document dom = documentBuilder.parse(is);

    XPathFactory xpFactory = XPathFactory.newInstance();
    XPath exp = xpFactory.newXPath();
    NodeList list = (NodeList) exp.evaluate("//classifier[text() = '" + CLASSIFIER + "']", dom,
            XPathConstants.NODESET);

    Set<NexusArtifactDto> newArtifacts = new LinkedHashSet<>();
    for (int i = 0; i < list.getLength(); i++) {
        Node node = list.item(i);
        Node parent = node.getParentNode();

        String g = getNodeText(parent.getChildNodes(), "groupId");
        String a = getNodeText(parent.getChildNodes(), "artifactId");
        String v = getNodeText(parent.getChildNodes(), "version");
        String l = getNodeText(parent.getChildNodes(), "artifactLink");

        if (g != null & a != null & v != null & l != null) {
            NexusArtifactDto dto = new NexusArtifactDto();
            dto.setGroupId(g);
            dto.setArtifactId(a);
            dto.setVersion(v);
            dto.setArtifactLink(l);

            System.out.println("Found connector: " + dto.getGroupId() + ":" + dto.getArtifactId() + ":"
                    + dto.getVersion());

            // is it a new artifact
            boolean newArtifact = true;
            for (NexusArtifactDto existing : indexedArtifacts) {
                if (existing.getGroupId().equals(dto.getGroupId())
                        && existing.getArtifactId().equals(dto.getArtifactId())
                        && existing.getVersion().equals(dto.getVersion())) {
                    newArtifact = false;
                    break;
                }
            }
            if (newArtifact) {
                newArtifacts.add(dto);
            }
        }
    }

    // now download the new artifact JARs and look inside to find more details
    for (NexusArtifactDto dto : newArtifacts) {
        try {
            // download using url classloader reader
            URL jarUrl = new URL(dto.getArtifactLink());
            String json = loadCamelConnectorJSonSchema(jarUrl);

            ObjectMapper mapper = new ObjectMapper();
            ConnectionCatalogDto cat = mapper.readerFor(ConnectionCatalogDto.class).readValue(json);

            indexedArtifacts.add(dto);
            connectors.putIfAbsent(dto, cat);
            System.out.println("Added connector: " + dto.getGroupId() + ":" + dto.getArtifactId() + ":"
                    + dto.getVersion());
        } catch (Exception e) {
            System.err.println("Error downloading connector JAR " + dto.getArtifactLink()
                    + ". This exception is ignored. " + e.getMessage());
        }
    }

    IOHelpers.close(is);
}

From source file:com.mediaworx.xmlutils.XmlHelper.java

/**
 * Creates and returns a document builder that is configured with the following options:
 * <ul>/* w  w  w .  ja v a2s .c o  m*/
 *     <li>don't validate</li>
 *     <li>ignore comments</li>
 *     <li>ignore content whitespace</li>
 *     <li>convert CDATA nodes to text nodes</li>
 *     <li>don't perform namespace processing</li>
 *     <li>ignore DTDs</li>
 * </ul>
 * @return the DocumentBuilder
 * @throws ParserConfigurationException if for some reason the DocumentBuilder used to parse the XML can't be
 *                                      initialized
 */
private DocumentBuilder getNonValidatingDocumentBuilder() throws ParserConfigurationException {
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setValidating(false);
    documentBuilderFactory.setIgnoringComments(true);
    documentBuilderFactory.setIgnoringElementContentWhitespace(true);
    documentBuilderFactory.setCoalescing(true);
    documentBuilderFactory.setFeature("http://xml.org/sax/features/namespaces", false);
    documentBuilderFactory.setFeature("http://xml.org/sax/features/validation", false);
    documentBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
    documentBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    return documentBuilderFactory.newDocumentBuilder();
}

From source file:com.jaspersoft.studio.custom.adapter.controls.DynamicControlComposite.java

/**
 * Search a castor mapping file inside the data adapter jar and if it is found create the controls
 * to edit it//www. j  ava2 s  . co m
        
 */
protected void createDynamicControls() {
    String xmlDefinition = getXmlDefinitionLocation();
    if (xmlDefinition != null) {
        DataAdapter adapter = dataAdapterDescriptor.getDataAdapter();
        InputStream is = dataAdapterDescriptor.getClass().getResourceAsStream("/" + xmlDefinition);
        if (null != is) {
            try {
                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                dbf.setValidating(false);
                dbf.setIgnoringComments(true);
                dbf.setNamespaceAware(false);
                DocumentBuilder builder = dbf.newDocumentBuilder();
                builder.setEntityResolver(new EntityResolver() {
                    @Override
                    public InputSource resolveEntity(String publicId, String systemId)
                            throws SAXException, IOException {
                        if (systemId.contains("http://castor.org/mapping.dtd")) {
                            return new InputSource(new StringReader(""));
                        } else {
                            return null;
                        }
                    }
                });

                Document document = builder.parse(is);
                Node mapNode = document.getDocumentElement();
                if (mapNode.getNodeName().equals("mapping")) {
                    NodeList adapterNodes = mapNode.getChildNodes();
                    for (int j = 0; j < adapterNodes.getLength(); ++j) {
                        Node adapterNode = adapterNodes.item(j);
                        if (adapterNode.getNodeName().equals("class")) {
                            String classAttribute = adapterNode.getAttributes().getNamedItem("name")
                                    .getNodeValue();
                            if (classAttribute != null && classAttribute.equals(adapter.getClass().getName())) {
                                createDynamicControls(adapterNode.getChildNodes());
                                is.close();
                                return;
                            }
                        }
                    }
                }
            } catch (Exception ex) {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                ex.printStackTrace();
            }
        }

    }
}