List of usage examples for javax.xml.parsers DocumentBuilderFactory setFeature
public abstract void setFeature(String name, boolean value) throws ParserConfigurationException;
From source file:org.wso2.carbon.wsdl2form.Util.java
/** * Securely parse XML document.// w ww . ja v a 2 s .co m * * @param payload String XML * @return XML Document * @throws ParserConfigurationException error parsing xml * @throws IOException IO error in processing XML document * @throws SAXException SAX error in processing XML document */ private static Document secureParseXML(String payload) throws ParserConfigurationException, IOException, SAXException { Document document; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(true); dbf.setNamespaceAware(true); // Perform namespace processing dbf.setFeature("http://xml.org/sax/features/namespaces", true); // Validate the document and report validity errors. dbf.setFeature("http://xml.org/sax/features/validation", true); // Build the grammar but do not use the default attributes and attribute types information it contains. dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); // Ignore the external DTD completely. dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource inputSource = new InputSource(); inputSource.setCharacterStream(new StringReader(payload)); document = db.parse(inputSource); return document; }
From source file:org.wso2.identity.iml.dsl.mediators.SAMLRequestProcessor.java
private AuthnRequest SAMLRequestParser(String samlRequest) throws ParserConfigurationException, SAXException, ConfigurationException, IOException, UnmarshallingException { IMLUtils.doBootstrap();// www.j a v a 2 s . c om DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setNamespaceAware(true); documentBuilderFactory.setExpandEntityReferences(false); documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); org.apache.xerces.util.SecurityManager securityManager = new SecurityManager(); securityManager.setEntityExpansionLimit(0); documentBuilderFactory.setAttribute(SECURITY_MANAGER_PROPERTY, securityManager); DocumentBuilder docBuilder = documentBuilderFactory.newDocumentBuilder(); docBuilder.setEntityResolver((publicId, systemId) -> { throw new SAXException( "SAML request contains invalid elements. Possible XML External Entity " + "(XXE) attack."); }); try (InputStream inputStream = new ByteArrayInputStream( samlRequest.trim().getBytes(StandardCharsets.UTF_8))) { Document document = docBuilder.parse(inputStream); Element element = document.getDocumentElement(); UnmarshallerFactory unmarshallerFactory = Configuration.getUnmarshallerFactory(); Unmarshaller unmarshaller = unmarshallerFactory.getUnmarshaller(element); AuthnRequest authnRequest = (AuthnRequest) unmarshaller.unmarshall(element); return authnRequest; } }
From source file:org.wso2.identity.scenarios.commons.SAML2SSOTestBase.java
private XMLObject unmarshall(String saml2SSOString) throws Exception { doBootstrap();//from w w w . j a va 2 s . com DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setNamespaceAware(true); documentBuilderFactory.setXIncludeAware(false); documentBuilderFactory.setExpandEntityReferences(false); try { documentBuilderFactory .setFeature(Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE, false); documentBuilderFactory.setFeature( Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE, false); documentBuilderFactory.setFeature(Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE, false); documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (ParserConfigurationException e) { log.error("Failed to load XML Processor Feature " + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE + " or " + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE + " or " + Constants.LOAD_EXTERNAL_DTD_FEATURE + " or secure-processing."); } org.apache.xerces.util.SecurityManager securityManager = new SecurityManager(); securityManager.setEntityExpansionLimit(ENTITY_EXPANSION_LIMIT); documentBuilderFactory.setAttribute(Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY, securityManager); documentBuilderFactory.setIgnoringComments(true); Document document = getDocument(documentBuilderFactory, saml2SSOString); if (isSignedWithComments(document)) { documentBuilderFactory.setIgnoringComments(false); document = getDocument(documentBuilderFactory, saml2SSOString); } Element element = document.getDocumentElement(); UnmarshallerFactory unmarshallerFactory = Configuration.getUnmarshallerFactory(); Unmarshaller unmarshaller = unmarshallerFactory.getUnmarshaller(element); return unmarshaller.unmarshall(element); }
From source file:org.wso2.mobile.utils.utilities.ZipFileReading.java
public static Document loadXMLFromString(String xml) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); DocumentBuilder builder = factory.newDocumentBuilder(); InputSource is = new InputSource(new StringReader(xml)); return builder.parse(is); }
From source file:org.wso2.pc.integration.tests.publisher.processes.AssociateBPMNTestCase.java
private Element getAssociateProcess(String processType) throws Exception { Element associateProcessElement = null; WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext); String xml = new String(wsRegistryServiceClient.getContent("/_system/governance/bpmn/TestProcess1/1.0")); DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder(); Document document = builder.parse(new InputSource(new StringReader(xml))); Element root = document.getDocumentElement(); if (root.getElementsByTagName(processType) != null) associateProcessElement = (Element) root.getElementsByTagName(processType).item(0); return associateProcessElement; }
From source file:org.wso2.pc.integration.tests.publisher.processes.AssociateURLTestCase.java
@Test(groups = { "org.wso2.pc" }, description = "Check associated GDOC document existence", dependsOnMethods = "associateGDoc") public void checkGDoc() throws Exception { RegistryProviderUtil registryProviderUtil = new RegistryProviderUtil(); WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext); String xml = new String( wsRegistryServiceClient.getContent("/_system/governance/processes/TestProcess1/1.0")); DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder(); Document document = builder.parse(new InputSource(new StringReader(xml))); Element root = document.getDocumentElement(); Assert.assertNotNull(root.getElementsByTagName("document").item(0), "No document found"); String expectedGDocURL = ((Element) root.getElementsByTagName("document").item(0)) .getElementsByTagName("url").item(0).getTextContent(); Assert.assertTrue(expectedGDocURL.equals(GDOC_URL), "Expected GDoc URL not found"); }
From source file:org.wso2.pc.integration.tests.publisher.processes.ImportProcessTestCase.java
private Element getAssociateProcess(String processType) throws Exception { Element associateProcessElement = null; WSRegistryServiceClient wsRegistryServiceClient = registryProviderUtil.getWSRegistry(automationContext); String xml = new String(wsRegistryServiceClient.getContent("/_system/governance/bpmn/Process1/1.0")); DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder(); Document document = builder.parse(new InputSource(new StringReader(xml))); Element root = document.getDocumentElement(); if (root.getElementsByTagName(processType) != null) associateProcessElement = (Element) root.getElementsByTagName(processType).item(0); return associateProcessElement; }
From source file:org.xdi.service.XmlService.java
private DocumentBuilderFactory creaeDocumentBuilderFactory() throws ParserConfigurationException { DocumentBuilderFactory fty = DocumentBuilderFactory.newInstance(); fty.setNamespaceAware(true);/* w w w.j a v a2 s .c o m*/ // Fix XXE vulnerability fty.setXIncludeAware(false); fty.setExpandEntityReferences(false); fty.setFeature("http://xml.org/sax/features/external-parameter-entities", false); fty.setFeature("http://xml.org/sax/features/external-general-entities", false); fty.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); return fty; }
From source file:org.zaproxy.zap.extension.ascanrulesBeta.CrossDomainScanner.java
@Override public void init() { DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); try {/*from ww w .j a va 2 s.c om*/ docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); docBuilderFactory.setExpandEntityReferences(false); docBuilder = docBuilderFactory.newDocumentBuilder(); xpath = XPathFactory.newInstance().newXPath(); } catch (ParserConfigurationException e) { log.error("Failed to create document builder:", e); } }
From source file:pl.edu.icm.cermine.evaluation.BwmetaFinalMetadataExtractionEvaluation.java
public void evaluate(int mode, NlmIterator iter) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException { javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance(); dbf.setValidating(false);/* w w w. j a va 2 s . c om*/ dbf.setFeature("http://xml.org/sax/features/namespaces", false); dbf.setFeature("http://xml.org/sax/features/validation", false); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); javax.xml.parsers.DocumentBuilder documentBuilder = dbf.newDocumentBuilder(); SAXBuilder builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser"); builder.setValidation(false); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); List<ComparisonResult> titles = new ArrayList<ComparisonResult>(); List<ComparisonResult> authors = new ArrayList<ComparisonResult>(); List<ComparisonResult> affiliations = new ArrayList<ComparisonResult>(); List<ComparisonResult> authorsAffiliations = new ArrayList<ComparisonResult>(); List<ComparisonResult> emails = new ArrayList<ComparisonResult>(); List<ComparisonResult> authorsEmails = new ArrayList<ComparisonResult>(); List<ComparisonResult> abstracts = new ArrayList<ComparisonResult>(); List<ComparisonResult> keywords = new ArrayList<ComparisonResult>(); List<ComparisonResult> journals = new ArrayList<ComparisonResult>(); List<ComparisonResult> volumes = new ArrayList<ComparisonResult>(); List<ComparisonResult> issues = new ArrayList<ComparisonResult>(); List<ComparisonResult> pageRanges = new ArrayList<ComparisonResult>(); List<ComparisonResult> years = new ArrayList<ComparisonResult>(); List<ComparisonResult> dois = new ArrayList<ComparisonResult>(); List<ComparisonResult> references = new ArrayList<ComparisonResult>(); if (mode == 1) { System.out.println("path,cerm_title,cerm_abstract,cerm_keywords," + "cerm_authors,cerm_affs,cerm_autaff,cerm_email,cerm_autemail,cerm_journal,cerm_volume,cerm_issue," + "cerm_pages,cerm_year,cerm_doi,cerm_refs,one"); } int i = 0; for (NlmPair pair : iter) { i++; if (mode == 0) { System.out.println(""); System.out.println(">>>>>>>>> " + i); System.out.println(pair.getExtractedNlm().getPath()); } if (mode == 1) { System.out.print(pair.getOriginalNlm().getPath() + ","); } org.w3c.dom.Document originalNlm; org.w3c.dom.Document extractedNlm; try { originalNlm = documentBuilder.parse(new FileInputStream(pair.getOriginalNlm())); extractedNlm = documentBuilder.parse(new FileInputStream(pair.getExtractedNlm())); } catch (SAXException ex) { i--; continue; } // Document's title MetadataSingle title = new MetadataSingle(originalNlm, "/bwmeta/element/name[not(@type)]", extractedNlm, "/article/front/article-meta//article-title"); title.setComp(EvaluationUtils.swComparator); titles.add(title); title.print(mode, "title"); // Abstract MetadataSingle abstrakt = new MetadataSingle(originalNlm, "/bwmeta/element/description[@type='abstract']", extractedNlm, "/article/front/article-meta/abstract"); abstrakt.setComp(EvaluationUtils.swComparator); abstracts.add(abstrakt); abstrakt.print(mode, "abstract"); // Keywords MetadataList keyword = new MetadataList(originalNlm, "/bwmeta/element/tags[@type='keyword']/tag", extractedNlm, "/article/front/article-meta/kwd-group/kwd"); keywords.add(keyword); keyword.print(mode, "keywords"); // Authors List<Node> expectedAuthorNodes = XMLTools.extractNodes(originalNlm, "/bwmeta/element/contributor[@role='author']"); List<String> expectedAuthors = new ArrayList<String>(); for (Node authorNode : expectedAuthorNodes) { List<Node> names = XMLTools.extractChildrenNodesFromNode(authorNode, "name"); if (names.isEmpty()) { continue; } for (Node n : names) { if (n.getAttributes().getNamedItem("type") != null && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) { expectedAuthors.add(n.getTextContent()); break; } } } List<Node> extractedAuthorNodes = XMLTools.extractNodes(extractedNlm, "/article/front/article-meta/contrib-group/contrib[@contrib-type='author'][string-name]"); List<String> extractedAuthors = new ArrayList<String>(); for (Node authorNode : extractedAuthorNodes) { List<String> names = XMLTools.extractChildrenTextFromNode(authorNode, "string-name"); if (names.isEmpty()) { continue; } extractedAuthors.add(names.get(0)); } MetadataList author = new MetadataList(expectedAuthors, extractedAuthors); author.setComp(EvaluationUtils.authorComparator); authors.add(author); author.print(mode, "author"); // Affiliations Set<String> expectedAffiliationsSet = Sets .newHashSet(XMLTools.extractTextAsList(originalNlm, "/bwmeta/element/affiliation/text")); Set<String> extractedAffiliationsSet = Sets .newHashSet(XMLTools.extractTextAsList(extractedNlm, "/article/front/article-meta//aff")); List<String> expectedAffiliations = Lists.newArrayList(expectedAffiliationsSet); List<String> extractedAffiliations = Lists.newArrayList(extractedAffiliationsSet); MetadataList affiliation = new MetadataList(expectedAffiliations, extractedAffiliations); affiliation.setComp(EvaluationUtils.cosineComparator()); affiliations.add(affiliation); affiliation.print(mode, "affiliation"); // Author - Affiliation relation MetadataRelation authorAffiliation = new MetadataRelation(); authorAffiliation.setComp1(EvaluationUtils.authorComparator); authorAffiliation.setComp2(EvaluationUtils.cosineComparator()); List<Node> expectedAffiliationNodes = XMLTools.extractNodes(originalNlm, "/bwmeta/element/affiliation"); Map<String, String> expectedAffiliationMap = new HashMap<String, String>(); for (Node expectedAffiliationNode : expectedAffiliationNodes) { String id = expectedAffiliationNode.getAttributes().getNamedItem("id").getNodeValue(); String aff = XMLTools.extractChildrenTextFromNode(expectedAffiliationNode, "text").get(0); expectedAffiliationMap.put(id, aff); } List<Node> extractedAffiliationNodes = XMLTools.extractNodes(extractedNlm, "/article/front/article-meta//aff[@id]"); Map<String, String> extractedAffiliationMap = new HashMap<String, String>(); for (Node extractedAffiliationNode : extractedAffiliationNodes) { String id = extractedAffiliationNode.getAttributes().getNamedItem("id").getNodeValue(); String aff = XMLTools.extractTextFromNode(extractedAffiliationNode); extractedAffiliationMap.put(id, aff); } for (Node expectedAuthorNode : expectedAuthorNodes) { String authorName = null; List<Node> names = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "name"); if (names.isEmpty()) { continue; } for (Node n : names) { if (n.getAttributes().getNamedItem("type") != null && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) { authorName = n.getTextContent(); break; } } if (authorName == null) continue; List<Node> xrefs = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "affiliation-ref"); for (Node xref : xrefs) { String affId = xref.getAttributes().getNamedItem("ref").getNodeValue(); String aff = expectedAffiliationMap.get(affId); if (aff != null) authorAffiliation.addExpected(new StringRelation(authorName, aff)); } } for (Node extractedAuthorNode : extractedAuthorNodes) { String authorName = extractedAuthors.get(extractedAuthorNodes.indexOf(extractedAuthorNode)); List<Node> xrefs = XMLTools.extractChildrenNodesFromNode(extractedAuthorNode, "xref"); for (Node xref : xrefs) { if ("aff".equals(xref.getAttributes().getNamedItem("ref-type").getNodeValue())) { String affId = xref.getAttributes().getNamedItem("rid").getNodeValue(); for (String id : affId.split(" ")) { String aff = extractedAffiliationMap.get(id); if (aff != null) { authorAffiliation.addExtracted(new StringRelation(authorName, aff)); } } } } } authorsAffiliations.add(authorAffiliation); authorAffiliation.print(mode, "author - affiliation"); // Email addresses MetadataList email = new MetadataList(originalNlm, "/bwmeta/element/contributor[@role='author']/attribute[@key='contact-email']/value", extractedNlm, "/article/front/article-meta/contrib-group/contrib[@contrib-type='author']//email"); email.setComp(EvaluationUtils.emailComparator); emails.add(email); email.print(mode, "email"); // Author - Email relations MetadataRelation authorEmail = new MetadataRelation(); authorEmail.setComp1(EvaluationUtils.authorComparator); authorEmail.setComp2(EvaluationUtils.emailComparator); for (Node expectedAuthorNode : expectedAuthorNodes) { String authorName = null; List<Node> names = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "name"); if (names.isEmpty()) { continue; } for (Node n : names) { if (n.getAttributes().getNamedItem("type") != null && n.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) { authorName = n.getTextContent();//.replaceAll("[^a-zA-Z]", ""); break; } } if (authorName == null) continue; List<Node> addresses = XMLTools.extractChildrenNodesFromNode(expectedAuthorNode, "attribute"); for (Node address : addresses) { if ("contact-email".equals(address.getAttributes().getNamedItem("key").getNodeValue())) { String ema = XMLTools.extractChildrenTextFromNode(address, "value").get(0); authorEmail.addExpected(new StringRelation(authorName, ema)); } } } for (Node extractedAuthorNode : extractedAuthorNodes) { String authorName = extractedAuthors.get(extractedAuthorNodes.indexOf(extractedAuthorNode)); for (String emailAddress : XMLTools.extractChildrenTextFromNode(extractedAuthorNode, "email")) { authorEmail.addExtracted(new StringRelation(authorName, emailAddress)); } } authorsEmails.add(authorEmail); authorEmail.print(mode, "author - email"); // Journal title MetadataSingle journal = new MetadataSingle(originalNlm, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Journal']/name[@type='canonical']", extractedNlm, "/article/front/journal-meta/journal-title-group/journal-title"); journal.setComp(EvaluationUtils.journalComparator); journals.add(journal); journal.print(mode, "journal title"); // Volume MetadataSingle volume = new MetadataSingle(originalNlm, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Volume']/name[@type='canonical']", extractedNlm, "/article/front/article-meta/volume"); volumes.add(volume); volume.print(mode, "volume"); // Issue MetadataSingle issue = new MetadataSingle(originalNlm, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Number']/name[@type='canonical']", extractedNlm, "/article/front/article-meta/issue"); issues.add(issue); issue.print(mode, "issue"); // Pages range MetadataSingle fPage = new MetadataSingle(originalNlm, "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position", extractedNlm, "/article/front/article-meta/fpage"); MetadataSingle lPage = new MetadataSingle(originalNlm, "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position", extractedNlm, "/article/front/article-meta/lpage"); String expRange = fPage.hasExpected() ? fPage.getExpectedValue().replaceAll("-", "--") : ""; String extrRange = fPage.hasExtracted() && lPage.hasExtracted() ? fPage.getExtractedValue() + "--" + lPage.getExtractedValue() : ""; MetadataSingle pageRange = new MetadataSingle(expRange, extrRange); pageRanges.add(pageRange); pageRange.print(mode, "pages"); // Publication date List<String> expectedPubDate = XMLTools.extractTextAsList(originalNlm, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Year']/name[@type='canonical']"); expectedPubDate = EvaluationUtils.removeLeadingZerosFromDate(expectedPubDate); List<String> extractedPubDate = XMLTools.extractTextAsList(extractedNlm, "/article/front/article-meta/pub-date"); extractedPubDate = EvaluationUtils.removeLeadingZerosFromDate(extractedPubDate); MetadataSingle year = new MetadataSingle(StringUtils.join(expectedPubDate, "---"), StringUtils.join(extractedPubDate, "---")); year.setComp(EvaluationUtils.yearComparator); years.add(year); year.print(mode, "year"); // DOI MetadataSingle doi = new MetadataSingle(originalNlm, "/bwmeta/element/id[@scheme='bwmeta1.id-class.DOI']/@value", extractedNlm, "/article/front/article-meta/article-id[@pub-id-type='doi']"); dois.add(doi); doi.print(mode, "DOI"); //references List<Node> originalRefNodes = XMLTools.extractNodes(originalNlm, "//relation[@type='reference-to']/attribute[@key='reference-text']/value"); List<Node> extractedRefNodes = XMLTools.extractNodes(extractedNlm, "//ref-list/ref"); List<String> originalRefs = new ArrayList<String>(); List<String> extractedRefs = new ArrayList<String>(); for (Node originalRefNode : originalRefNodes) { originalRefs.add(XMLTools.extractTextFromNode(originalRefNode).trim()); } for (Node extractedRefNode : extractedRefNodes) { extractedRefs.add(XMLTools.extractTextFromNode(extractedRefNode).trim()); } MetadataList refs = new MetadataList(originalRefs, extractedRefs); refs.setComp(EvaluationUtils.cosineComparator(0.6)); references.add(refs); refs.print(mode, "references"); if (mode == 1) { System.out.println("1"); } } if (mode != 1) { System.out.println("==== Summary (" + iter.size() + " docs)===="); PrecisionRecall titlePR = new PrecisionRecall().build(titles); titlePR.print("Title"); PrecisionRecall abstractPR = new PrecisionRecall().build(abstracts); abstractPR.print("Abstract"); PrecisionRecall keywordsPR = new PrecisionRecall().build(keywords); keywordsPR.print("Keywords"); PrecisionRecall authorsPR = new PrecisionRecall().build(authors); authorsPR.print("Authors"); PrecisionRecall affiliationsPR = new PrecisionRecall().build(affiliations); affiliationsPR.print("Affiliations"); PrecisionRecall authorsAffiliationsPR = new PrecisionRecall().build(authorsAffiliations); authorsAffiliationsPR.print("Author - affiliation"); PrecisionRecall emailsPR = new PrecisionRecall().build(emails); emailsPR.print("Emails"); PrecisionRecall authorsEmailsPR = new PrecisionRecall().build(authorsEmails); authorsEmailsPR.print("Author - email"); PrecisionRecall journalPR = new PrecisionRecall().build(journals); journalPR.print("Journal"); PrecisionRecall volumePR = new PrecisionRecall().build(volumes); volumePR.print("Volume"); PrecisionRecall issuePR = new PrecisionRecall().build(issues); issuePR.print("Issue"); PrecisionRecall pageRangePR = new PrecisionRecall().build(pageRanges); pageRangePR.print("Pages"); PrecisionRecall yearPR = new PrecisionRecall().build(years); yearPR.print("Year"); PrecisionRecall doiPR = new PrecisionRecall().build(dois); doiPR.print("DOI"); PrecisionRecall refsPR = new PrecisionRecall().build(references); refsPR.print("References"); List<PrecisionRecall> results = Lists.newArrayList(titlePR, authorsPR, affiliationsPR, emailsPR, abstractPR, keywordsPR, journalPR, volumePR, issuePR, pageRangePR, yearPR, doiPR, refsPR); double avgPrecision = 0; double avgRecall = 0; double avgF1 = 0; for (PrecisionRecall result : results) { avgPrecision += result.getPrecision(); avgRecall += result.getRecall(); avgF1 += result.getF1(); } avgPrecision /= results.size(); avgRecall /= results.size(); avgF1 /= results.size(); System.out.printf("Average precision\t\t%4.2f\n", 100 * avgPrecision); System.out.printf("Average recall\t\t%4.2f\n", 100 * avgRecall); System.out.printf("Average F1 score\t\t%4.2f\n", 100 * avgF1); } }