Example usage for javax.xml.parsers DocumentBuilderFactory setFeature

List of usage examples for javax.xml.parsers DocumentBuilderFactory setFeature

Introduction

In this page you can find the example usage for javax.xml.parsers DocumentBuilderFactory setFeature.

Prototype

public abstract void setFeature(String name, boolean value) throws ParserConfigurationException;

Source Link

Document

Set a feature for this DocumentBuilderFactory and DocumentBuilder s created by this factory.

Usage

From source file:org.wso2.carbon.wsdl2form.Util.java

/**
 * Securely parse XML document.// w ww .  ja v a 2  s .co  m
 *
 * @param payload String XML
 * @return XML Document
 * @throws ParserConfigurationException error parsing xml
 * @throws IOException                  IO error in processing XML document
 * @throws SAXException                 SAX error in processing XML document
 */
private static Document secureParseXML(String payload)
        throws ParserConfigurationException, IOException, SAXException {

    Document document;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(true);
    dbf.setNamespaceAware(true);

    // Perform namespace processing
    dbf.setFeature("http://xml.org/sax/features/namespaces", true);

    // Validate the document and report validity errors.
    dbf.setFeature("http://xml.org/sax/features/validation", true);

    // Build the grammar but do not use the default attributes and attribute types information it contains.
    dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);

    // Ignore the external DTD completely.
    dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

    DocumentBuilder db = dbf.newDocumentBuilder();
    InputSource inputSource = new InputSource();
    inputSource.setCharacterStream(new StringReader(payload));
    document = db.parse(inputSource);
    return document;
}

From source file:org.wso2.identity.iml.dsl.mediators.SAMLRequestProcessor.java

private AuthnRequest SAMLRequestParser(String samlRequest) throws ParserConfigurationException, SAXException,
        ConfigurationException, IOException, UnmarshallingException {

    IMLUtils.doBootstrap();//  www.j a  v a 2 s . c  om
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setNamespaceAware(true);
    documentBuilderFactory.setExpandEntityReferences(false);
    documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);

    org.apache.xerces.util.SecurityManager securityManager = new SecurityManager();
    securityManager.setEntityExpansionLimit(0);

    documentBuilderFactory.setAttribute(SECURITY_MANAGER_PROPERTY, securityManager);
    DocumentBuilder docBuilder = documentBuilderFactory.newDocumentBuilder();
    docBuilder.setEntityResolver((publicId, systemId) -> {
        throw new SAXException(
                "SAML request contains invalid elements. Possible XML External Entity " + "(XXE) attack.");
    });

    try (InputStream inputStream = new ByteArrayInputStream(
            samlRequest.trim().getBytes(StandardCharsets.UTF_8))) {

        Document document = docBuilder.parse(inputStream);
        Element element = document.getDocumentElement();

        UnmarshallerFactory unmarshallerFactory = Configuration.getUnmarshallerFactory();
        Unmarshaller unmarshaller = unmarshallerFactory.getUnmarshaller(element);

        AuthnRequest authnRequest = (AuthnRequest) unmarshaller.unmarshall(element);
        return authnRequest;
    }

}

From source file:org.wso2.identity.scenarios.commons.SAML2SSOTestBase.java

private XMLObject unmarshall(String saml2SSOString) throws Exception {

    doBootstrap();//from w  w w .  j a  va 2  s .  com
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setNamespaceAware(true);
    documentBuilderFactory.setXIncludeAware(false);
    documentBuilderFactory.setExpandEntityReferences(false);
    try {
        documentBuilderFactory
                .setFeature(Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
        documentBuilderFactory.setFeature(
                Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
        documentBuilderFactory.setFeature(Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE,
                false);
        documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);

    } catch (ParserConfigurationException e) {
        log.error("Failed to load XML Processor Feature " + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE + " or "
                + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE + " or " + Constants.LOAD_EXTERNAL_DTD_FEATURE
                + " or secure-processing.");
    }

    org.apache.xerces.util.SecurityManager securityManager = new SecurityManager();
    securityManager.setEntityExpansionLimit(ENTITY_EXPANSION_LIMIT);
    documentBuilderFactory.setAttribute(Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY,
            securityManager);

    documentBuilderFactory.setIgnoringComments(true);
    Document document = getDocument(documentBuilderFactory, saml2SSOString);
    if (isSignedWithComments(document)) {
        documentBuilderFactory.setIgnoringComments(false);
        document = getDocument(documentBuilderFactory, saml2SSOString);
    }
    Element element = document.getDocumentElement();
    UnmarshallerFactory unmarshallerFactory = Configuration.getUnmarshallerFactory();
    Unmarshaller unmarshaller = unmarshallerFactory.getUnmarshaller(element);
    return unmarshaller.unmarshall(element);
}

From source file:org.wso2.mobile.utils.utilities.ZipFileReading.java

public static Document loadXMLFromString(String xml) throws Exception {

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
    DocumentBuilder builder = factory.newDocumentBuilder();
    InputSource is = new InputSource(new StringReader(xml));
    return builder.parse(is);
}

From source file:org.wso2.pc.integration.tests.publisher.processes.AssociateBPMNTestCase.java

private Element getAssociateProcess(String processType) throws Exception {
    Element associateProcessElement = null;
    WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext);
    String xml = new String(wsRegistryServiceClient.getContent("/_system/governance/bpmn/TestProcess1/1.0"));
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
    DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder();
    Document document = builder.parse(new InputSource(new StringReader(xml)));
    Element root = document.getDocumentElement();
    if (root.getElementsByTagName(processType) != null)
        associateProcessElement = (Element) root.getElementsByTagName(processType).item(0);
    return associateProcessElement;
}

From source file:org.wso2.pc.integration.tests.publisher.processes.AssociateURLTestCase.java

@Test(groups = {
        "org.wso2.pc" }, description = "Check associated GDOC document existence", dependsOnMethods = "associateGDoc")
public void checkGDoc() throws Exception {
    RegistryProviderUtil registryProviderUtil = new RegistryProviderUtil();
    WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext);
    String xml = new String(
            wsRegistryServiceClient.getContent("/_system/governance/processes/TestProcess1/1.0"));
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
    DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder();
    Document document = builder.parse(new InputSource(new StringReader(xml)));
    Element root = document.getDocumentElement();
    Assert.assertNotNull(root.getElementsByTagName("document").item(0), "No document found");
    String expectedGDocURL = ((Element) root.getElementsByTagName("document").item(0))
            .getElementsByTagName("url").item(0).getTextContent();
    Assert.assertTrue(expectedGDocURL.equals(GDOC_URL), "Expected GDoc URL not found");
}

From source file:org.wso2.pc.integration.tests.publisher.processes.ImportProcessTestCase.java

private Element getAssociateProcess(String processType) throws Exception {
    Element associateProcessElement = null;
    WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext);
    String xml = new String(wsRegistryServiceClient.getContent("/_system/governance/bpmn/Process1/1.0"));
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
    documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
    DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder();
    Document document = builder.parse(new InputSource(new StringReader(xml)));
    Element root = document.getDocumentElement();
    if (root.getElementsByTagName(processType) != null)
        associateProcessElement = (Element) root.getElementsByTagName(processType).item(0);
    return associateProcessElement;
}

From source file:org.xdi.service.XmlService.java

private DocumentBuilderFactory creaeDocumentBuilderFactory() throws ParserConfigurationException {
    DocumentBuilderFactory fty = DocumentBuilderFactory.newInstance();

    fty.setNamespaceAware(true);/*  w  w  w.j a  v a2 s  .c  o m*/

    // Fix XXE vulnerability
    fty.setXIncludeAware(false);
    fty.setExpandEntityReferences(false);
    fty.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
    fty.setFeature("http://xml.org/sax/features/external-general-entities", false);
    fty.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    return fty;
}

From source file:org.zaproxy.zap.extension.ascanrulesBeta.CrossDomainScanner.java

@Override
public void init() {
    DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
    try {/*from  ww  w .j a va 2  s.c  om*/
        docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
        docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        docBuilderFactory.setExpandEntityReferences(false);
        docBuilder = docBuilderFactory.newDocumentBuilder();
        xpath = XPathFactory.newInstance().newXPath();
    } catch (ParserConfigurationException e) {
        log.error("Failed to create document builder:", e);
    }
}

From source file:pl.edu.icm.cermine.evaluation.BwmetaFinalMetadataExtractionEvaluation.java

public void evaluate(int mode, NlmIterator iter)
        throws AnalysisException, IOException, TransformationException, ParserConfigurationException,
        SAXException, JDOMException, XPathExpressionException, TransformerException {

    javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance();
    dbf.setValidating(false);/* w  w w. j  a va  2 s . c  om*/
    dbf.setFeature("http://xml.org/sax/features/namespaces", false);
    dbf.setFeature("http://xml.org/sax/features/validation", false);
    dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
    dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

    javax.xml.parsers.DocumentBuilder documentBuilder = dbf.newDocumentBuilder();

    SAXBuilder builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
    builder.setValidation(false);
    builder.setFeature("http://xml.org/sax/features/validation", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

    List<ComparisonResult> titles = new ArrayList<ComparisonResult>();
    List<ComparisonResult> authors = new ArrayList<ComparisonResult>();
    List<ComparisonResult> affiliations = new ArrayList<ComparisonResult>();
    List<ComparisonResult> authorsAffiliations = new ArrayList<ComparisonResult>();
    List<ComparisonResult> emails = new ArrayList<ComparisonResult>();
    List<ComparisonResult> authorsEmails = new ArrayList<ComparisonResult>();
    List<ComparisonResult> abstracts = new ArrayList<ComparisonResult>();
    List<ComparisonResult> keywords = new ArrayList<ComparisonResult>();
    List<ComparisonResult> journals = new ArrayList<ComparisonResult>();
    List<ComparisonResult> volumes = new ArrayList<ComparisonResult>();
    List<ComparisonResult> issues = new ArrayList<ComparisonResult>();
    List<ComparisonResult> pageRanges = new ArrayList<ComparisonResult>();
    List<ComparisonResult> years = new ArrayList<ComparisonResult>();
    List<ComparisonResult> dois = new ArrayList<ComparisonResult>();
    List<ComparisonResult> references = new ArrayList<ComparisonResult>();

    if (mode == 1) {
        System.out.println("path,cerm_title,cerm_abstract,cerm_keywords,"
                + "cerm_authors,cerm_affs,cerm_autaff,cerm_email,cerm_autemail,cerm_journal,cerm_volume,cerm_issue,"
                + "cerm_pages,cerm_year,cerm_doi,cerm_refs,one");
    }

    int i = 0;
    for (NlmPair pair : iter) {
        i++;
        if (mode == 0) {
            System.out.println("");
            System.out.println(">>>>>>>>> " + i);
            System.out.println(pair.getExtractedNlm().getPath());
        }
        if (mode == 1) {
            System.out.print(pair.getOriginalNlm().getPath() + ",");
        }

        org.w3c.dom.Document originalNlm;
        org.w3c.dom.Document extractedNlm;
        try {
            originalNlm = documentBuilder.parse(new FileInputStream(pair.getOriginalNlm()));
            extractedNlm = documentBuilder.parse(new FileInputStream(pair.getExtractedNlm()));
        } catch (SAXException ex) {
            i--;
            continue;
        }

        // Document's title
        MetadataSingle title = new MetadataSingle(originalNlm, "/bwmeta/element/name[not(@type)]", extractedNlm,
                "/article/front/article-meta//article-title");
        title.setComp(EvaluationUtils.swComparator);
        titles.add(title);
        title.print(mode, "title");

        // Abstract
        MetadataSingle abstrakt = new MetadataSingle(originalNlm,
                "/bwmeta/element/description[@type='abstract']", extractedNlm,
                "/article/front/article-meta/abstract");
        abstrakt.setComp(EvaluationUtils.swComparator);
        abstracts.add(abstrakt);
        abstrakt.print(mode, "abstract");

        // Keywords
        MetadataList keyword = new MetadataList(originalNlm, "/bwmeta/element/tags[@type='keyword']/tag",
                extractedNlm, "/article/front/article-meta/kwd-group/kwd");
        keywords.add(keyword);
        keyword.print(mode, "keywords");

        // Authors
        List<Node> expectedAuthorNodes = XMLTools.extractNodes(originalNlm,
                "/bwmeta/element/contributor[@role='author']");

        List<String> expectedAuthors = new ArrayList<String>();
        for (Node authorNode : expectedAuthorNodes) {
            List<Node> names = XMLTools.extractChildrenNodesFromNode(authorNode, "name");
            if (names.isEmpty()) {
                continue;
            }
            for (Node n : names) {
                if (n.getAttributes().getNamedItem("type") != null
                        && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                    expectedAuthors.add(n.getTextContent());
                    break;
                }
            }
        }

        List<Node> extractedAuthorNodes = XMLTools.extractNodes(extractedNlm,
                "/article/front/article-meta/contrib-group/contrib[@contrib-type='author'][string-name]");

        List<String> extractedAuthors = new ArrayList<String>();
        for (Node authorNode : extractedAuthorNodes) {
            List<String> names = XMLTools.extractChildrenTextFromNode(authorNode, "string-name");
            if (names.isEmpty()) {
                continue;
            }
            extractedAuthors.add(names.get(0));
        }

        MetadataList author = new MetadataList(expectedAuthors, extractedAuthors);
        author.setComp(EvaluationUtils.authorComparator);
        authors.add(author);
        author.print(mode, "author");

        // Affiliations
        Set<String> expectedAffiliationsSet = Sets
                .newHashSet(XMLTools.extractTextAsList(originalNlm, "/bwmeta/element/affiliation/text"));
        Set<String> extractedAffiliationsSet = Sets
                .newHashSet(XMLTools.extractTextAsList(extractedNlm, "/article/front/article-meta//aff"));
        List<String> expectedAffiliations = Lists.newArrayList(expectedAffiliationsSet);
        List<String> extractedAffiliations = Lists.newArrayList(extractedAffiliationsSet);
        MetadataList affiliation = new MetadataList(expectedAffiliations, extractedAffiliations);
        affiliation.setComp(EvaluationUtils.cosineComparator());
        affiliations.add(affiliation);
        affiliation.print(mode, "affiliation");

        // Author - Affiliation relation
        MetadataRelation authorAffiliation = new MetadataRelation();
        authorAffiliation.setComp1(EvaluationUtils.authorComparator);
        authorAffiliation.setComp2(EvaluationUtils.cosineComparator());

        List<Node> expectedAffiliationNodes = XMLTools.extractNodes(originalNlm, "/bwmeta/element/affiliation");
        Map<String, String> expectedAffiliationMap = new HashMap<String, String>();
        for (Node expectedAffiliationNode : expectedAffiliationNodes) {
            String id = expectedAffiliationNode.getAttributes().getNamedItem("id").getNodeValue();
            String aff = XMLTools.extractChildrenTextFromNode(expectedAffiliationNode, "text").get(0);
            expectedAffiliationMap.put(id, aff);
        }

        List<Node> extractedAffiliationNodes = XMLTools.extractNodes(extractedNlm,
                "/article/front/article-meta//aff[@id]");
        Map<String, String> extractedAffiliationMap = new HashMap<String, String>();
        for (Node extractedAffiliationNode : extractedAffiliationNodes) {
            String id = extractedAffiliationNode.getAttributes().getNamedItem("id").getNodeValue();
            String aff = XMLTools.extractTextFromNode(extractedAffiliationNode);
            extractedAffiliationMap.put(id, aff);
        }

        for (Node expectedAuthorNode : expectedAuthorNodes) {
            String authorName = null;

            List<Node> names = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "name");
            if (names.isEmpty()) {
                continue;
            }
            for (Node n : names) {
                if (n.getAttributes().getNamedItem("type") != null
                        && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                    authorName = n.getTextContent();
                    break;
                }
            }

            if (authorName == null)
                continue;

            List<Node> xrefs = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "affiliation-ref");
            for (Node xref : xrefs) {
                String affId = xref.getAttributes().getNamedItem("ref").getNodeValue();
                String aff = expectedAffiliationMap.get(affId);
                if (aff != null)
                    authorAffiliation.addExpected(new StringRelation(authorName, aff));
            }
        }

        for (Node extractedAuthorNode : extractedAuthorNodes) {
            String authorName = extractedAuthors.get(extractedAuthorNodes.indexOf(extractedAuthorNode));
            List<Node> xrefs = XMLTools.extractChildrenNodesFromNode(extractedAuthorNode, "xref");
            for (Node xref : xrefs) {
                if ("aff".equals(xref.getAttributes().getNamedItem("ref-type").getNodeValue())) {
                    String affId = xref.getAttributes().getNamedItem("rid").getNodeValue();
                    for (String id : affId.split(" ")) {
                        String aff = extractedAffiliationMap.get(id);
                        if (aff != null) {
                            authorAffiliation.addExtracted(new StringRelation(authorName, aff));
                        }
                    }
                }
            }
        }

        authorsAffiliations.add(authorAffiliation);
        authorAffiliation.print(mode, "author - affiliation");

        // Email addresses
        MetadataList email = new MetadataList(originalNlm,
                "/bwmeta/element/contributor[@role='author']/attribute[@key='contact-email']/value",
                extractedNlm,
                "/article/front/article-meta/contrib-group/contrib[@contrib-type='author']//email");
        email.setComp(EvaluationUtils.emailComparator);
        emails.add(email);
        email.print(mode, "email");

        // Author - Email relations
        MetadataRelation authorEmail = new MetadataRelation();
        authorEmail.setComp1(EvaluationUtils.authorComparator);
        authorEmail.setComp2(EvaluationUtils.emailComparator);

        for (Node expectedAuthorNode : expectedAuthorNodes) {
            String authorName = null;

            List<Node> names = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "name");
            if (names.isEmpty()) {
                continue;
            }
            for (Node n : names) {
                if (n.getAttributes().getNamedItem("type") != null
                        && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                    authorName = n.getTextContent();//.replaceAll("[^a-zA-Z]", "");
                    break;
                }
            }

            if (authorName == null)
                continue;

            List<Node> addresses = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "attribute");
            for (Node address : addresses) {
                if ("contact-email".equals(address.getAttributes().getNamedItem("key").getNodeValue())) {
                    String ema = XMLTools.extractChildrenTextFromNode(address, "value").get(0);
                    authorEmail.addExpected(new StringRelation(authorName, ema));
                }
            }
        }
        for (Node extractedAuthorNode : extractedAuthorNodes) {
            String authorName = extractedAuthors.get(extractedAuthorNodes.indexOf(extractedAuthorNode));

            for (String emailAddress : XMLTools.extractChildrenTextFromNode(extractedAuthorNode, "email")) {
                authorEmail.addExtracted(new StringRelation(authorName, emailAddress));
            }
        }
        authorsEmails.add(authorEmail);
        authorEmail.print(mode, "author - email");

        // Journal title
        MetadataSingle journal = new MetadataSingle(originalNlm,
                "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Journal']/name[@type='canonical']",
                extractedNlm, "/article/front/journal-meta/journal-title-group/journal-title");
        journal.setComp(EvaluationUtils.journalComparator);
        journals.add(journal);
        journal.print(mode, "journal title");

        // Volume
        MetadataSingle volume = new MetadataSingle(originalNlm,
                "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Volume']/name[@type='canonical']",
                extractedNlm, "/article/front/article-meta/volume");
        volumes.add(volume);
        volume.print(mode, "volume");

        // Issue            
        MetadataSingle issue = new MetadataSingle(originalNlm,
                "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Number']/name[@type='canonical']",
                extractedNlm, "/article/front/article-meta/issue");
        issues.add(issue);
        issue.print(mode, "issue");

        // Pages range
        MetadataSingle fPage = new MetadataSingle(originalNlm,
                "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position",
                extractedNlm, "/article/front/article-meta/fpage");
        MetadataSingle lPage = new MetadataSingle(originalNlm,
                "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position",
                extractedNlm, "/article/front/article-meta/lpage");
        String expRange = fPage.hasExpected() ? fPage.getExpectedValue().replaceAll("-", "--") : "";

        String extrRange = fPage.hasExtracted() && lPage.hasExtracted()
                ? fPage.getExtractedValue() + "--" + lPage.getExtractedValue()
                : "";
        MetadataSingle pageRange = new MetadataSingle(expRange, extrRange);
        pageRanges.add(pageRange);
        pageRange.print(mode, "pages");

        // Publication date
        List<String> expectedPubDate = XMLTools.extractTextAsList(originalNlm,
                "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Year']/name[@type='canonical']");
        expectedPubDate = EvaluationUtils.removeLeadingZerosFromDate(expectedPubDate);
        List<String> extractedPubDate = XMLTools.extractTextAsList(extractedNlm,
                "/article/front/article-meta/pub-date");
        extractedPubDate = EvaluationUtils.removeLeadingZerosFromDate(extractedPubDate);

        MetadataSingle year = new MetadataSingle(StringUtils.join(expectedPubDate, "---"),
                StringUtils.join(extractedPubDate, "---"));
        year.setComp(EvaluationUtils.yearComparator);
        years.add(year);
        year.print(mode, "year");

        // DOI
        MetadataSingle doi = new MetadataSingle(originalNlm,
                "/bwmeta/element/id[@scheme='bwmeta1.id-class.DOI']/@value", extractedNlm,
                "/article/front/article-meta/article-id[@pub-id-type='doi']");
        dois.add(doi);
        doi.print(mode, "DOI");

        //references
        List<Node> originalRefNodes = XMLTools.extractNodes(originalNlm,
                "//relation[@type='reference-to']/attribute[@key='reference-text']/value");
        List<Node> extractedRefNodes = XMLTools.extractNodes(extractedNlm, "//ref-list/ref");

        List<String> originalRefs = new ArrayList<String>();
        List<String> extractedRefs = new ArrayList<String>();
        for (Node originalRefNode : originalRefNodes) {
            originalRefs.add(XMLTools.extractTextFromNode(originalRefNode).trim());
        }
        for (Node extractedRefNode : extractedRefNodes) {
            extractedRefs.add(XMLTools.extractTextFromNode(extractedRefNode).trim());
        }

        MetadataList refs = new MetadataList(originalRefs, extractedRefs);
        refs.setComp(EvaluationUtils.cosineComparator(0.6));

        references.add(refs);
        refs.print(mode, "references");

        if (mode == 1) {
            System.out.println("1");
        }
    }

    if (mode != 1) {
        System.out.println("==== Summary (" + iter.size() + " docs)====");

        PrecisionRecall titlePR = new PrecisionRecall().build(titles);
        titlePR.print("Title");

        PrecisionRecall abstractPR = new PrecisionRecall().build(abstracts);
        abstractPR.print("Abstract");

        PrecisionRecall keywordsPR = new PrecisionRecall().build(keywords);
        keywordsPR.print("Keywords");

        PrecisionRecall authorsPR = new PrecisionRecall().build(authors);
        authorsPR.print("Authors");

        PrecisionRecall affiliationsPR = new PrecisionRecall().build(affiliations);
        affiliationsPR.print("Affiliations");

        PrecisionRecall authorsAffiliationsPR = new PrecisionRecall().build(authorsAffiliations);
        authorsAffiliationsPR.print("Author - affiliation");

        PrecisionRecall emailsPR = new PrecisionRecall().build(emails);
        emailsPR.print("Emails");

        PrecisionRecall authorsEmailsPR = new PrecisionRecall().build(authorsEmails);
        authorsEmailsPR.print("Author - email");

        PrecisionRecall journalPR = new PrecisionRecall().build(journals);
        journalPR.print("Journal");

        PrecisionRecall volumePR = new PrecisionRecall().build(volumes);
        volumePR.print("Volume");

        PrecisionRecall issuePR = new PrecisionRecall().build(issues);
        issuePR.print("Issue");

        PrecisionRecall pageRangePR = new PrecisionRecall().build(pageRanges);
        pageRangePR.print("Pages");

        PrecisionRecall yearPR = new PrecisionRecall().build(years);
        yearPR.print("Year");

        PrecisionRecall doiPR = new PrecisionRecall().build(dois);
        doiPR.print("DOI");

        PrecisionRecall refsPR = new PrecisionRecall().build(references);
        refsPR.print("References");

        List<PrecisionRecall> results = Lists.newArrayList(titlePR, authorsPR, affiliationsPR, emailsPR,
                abstractPR, keywordsPR, journalPR, volumePR, issuePR, pageRangePR, yearPR, doiPR, refsPR);

        double avgPrecision = 0;
        double avgRecall = 0;
        double avgF1 = 0;
        for (PrecisionRecall result : results) {
            avgPrecision += result.getPrecision();
            avgRecall += result.getRecall();
            avgF1 += result.getF1();
        }
        avgPrecision /= results.size();
        avgRecall /= results.size();
        avgF1 /= results.size();

        System.out.printf("Average precision\t\t%4.2f\n", 100 * avgPrecision);
        System.out.printf("Average recall\t\t%4.2f\n", 100 * avgRecall);
        System.out.printf("Average F1 score\t\t%4.2f\n", 100 * avgF1);
    }
}