List of usage examples for org.dom4j Node selectSingleNode
Node selectSingleNode(String xpathExpression);
selectSingleNode
evaluates an XPath expression and returns the result as a single Node
instance.
From source file:SolrUpdate.java
License:Apache License
public void processUrl() throws Exception { String jv, jn, ji, jd, jm, jy, jsp, authorfull, doi, epday, epmonth, epyear, epubsum, epubsum2 = ""; jv = jn = ji = jd = jm = jy = jsp = authorfull = doi = epday = epmonth = epyear = epubsum = epubsum2 = ""; SAXReader reader = new SAXReader(); SAXReader reader2 = new SAXReader(); Document document = null;// w w w.j a v a 2s.c o m String mytitle, myabstract, myyear, myfullname = ""; Element journalname, journalyear, journalmonth, journalday, journalvolume, journalissue, journalpagestart, epubday, epubmonth, epubyear, pubdoi; int mypmid; List<String> mylauthors = new ArrayList<String>(); List<String> myfauthors = new ArrayList<String>(); List<String> myfnames = new ArrayList<String>(); //PubMed String pubmedlist = ""; Iterator iditer = publications.iterator(); while (iditer.hasNext()) { int currpmid = ((Publication) iditer.next()).getPmid(); if (pubmedlist.length() < 1) { pubmedlist += currpmid; } else { pubmedlist += "," + currpmid; } } String url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + pubmedlist + "&retmax=200&retmode=xml&rettype=abstract"; Document pubdoc = reader2.read(url); @SuppressWarnings("unchecked") List<Node> thelist = pubdoc.selectNodes("//PubmedArticle| //PubmedBookArticle"); Element abstractnode, titlenode, yearsnode, pmidnode; @SuppressWarnings("rawtypes") List firstnamenode; @SuppressWarnings("rawtypes") List lastnamenode; for (Node currnode : thelist) { mylauthors = new ArrayList<String>(); myfauthors = new ArrayList<String>(); myfnames = new ArrayList<String>(); epubsum = epubsum2 = authorfull = ""; titlenode = (Element) currnode.selectSingleNode(".//ArticleTitle | .//BookTitle"); yearsnode = (Element) currnode .selectSingleNode(".//PubDate/Year | .//DateCompleted/Year | .//DateCreated/Year"); journalname = (Element) currnode.selectSingleNode(".//Journal/Title"); journalyear = (Element) currnode.selectSingleNode(".//PubDate/Year"); journalmonth = (Element) currnode.selectSingleNode(".//PubDate/Month"); journalday = (Element) currnode.selectSingleNode(".//PubDate/Day"); journalvolume = (Element) currnode.selectSingleNode(".//JournalIssue/Volume"); journalissue = (Element) currnode.selectSingleNode(".//JournalIssue/Issue"); journalpagestart = (Element) currnode.selectSingleNode(".//Pagination/MedlinePgn"); epubday = (Element) currnode.selectSingleNode( ".//PubMedPubDate[@PubStatus='aheadofprint']/Day | .//PubMedPubDate[@PubStatus='epublish']/Day "); epubmonth = (Element) currnode.selectSingleNode( ".//PubMedPubDate[@PubStatus='aheadofprint']/Month | .//PubMedPubDate[@PubStatus='epublish']/Month"); epubyear = (Element) currnode.selectSingleNode( ".//PubMedPubDate[@PubStatus='aheadofprint']/Year | .//PubMedPubDate[@PubStatus='epublish']/Year"); pubdoi = (Element) currnode.selectSingleNode(".//ArticleId[@IdType='doi']"); firstnamenode = currnode.selectNodes(".//ForeName"); lastnamenode = currnode.selectNodes(".//LastName"); abstractnode = (Element) currnode.selectSingleNode(".//Abstract/AbstractText[1]"); pmidnode = (Element) currnode.selectSingleNode(".//PMID"); myfnames = new ArrayList<String>(); @SuppressWarnings("rawtypes") Iterator fiter = firstnamenode.iterator(); @SuppressWarnings("rawtypes") Iterator liter = lastnamenode.iterator(); if (journalname != null) { jn = journalname.getText(); } if (journalvolume != null) { jv = journalvolume.getText(); } if (journalissue != null) { ji = journalissue.getText(); } if (journalmonth != null) { jm = journalmonth.getText(); } if (journalyear != null) { jy = journalyear.getText(); } if (journalpagestart != null) { jsp = journalpagestart.getText(); } if (journalday != null) { jd = journalday.getText(); } if (epubday != null) { epday = epubday.getText(); } if (epubmonth != null) { epmonth = epubmonth.getText(); } if (epubyear != null) { epyear = epubyear.getText(); } if (pubdoi != null) { doi = "doi: " + pubdoi.getText(); } if (jv.length() > 0) { epubsum2 += jv; } if (ji.length() > 0) { epubsum2 += "(" + ji + ")" + ":"; } if (jsp.length() > 0) { epubsum2 += jsp + "."; } if (epmonth.length() < 1 && epyear.length() < 1 && epday.length() < 1) { epubsum = "[Epub ahead of print]"; } else if (epyear.length() > 0) { epubsum = "Epub " + epyear + " " + epmonth + " " + epday; } else { epubsum = ""; } mytitle = titlenode.getText(); myyear = yearsnode.getText(); mypmid = Integer.valueOf(pmidnode.getText()); while (fiter.hasNext()) { Element fname = (Element) fiter.next(); Element lname = (Element) liter.next(); myfauthors.add(fname.getText()); mylauthors.add(lname.getText()); myfullname = fname.getText() + " " + lname.getText(); myfnames.add(myfullname); if (fiter.hasNext()) { authorfull = authorfull + myfullname + ", "; } else { authorfull = authorfull + myfullname; } } if (abstractnode != null) { myabstract = abstractnode.getText(); } else { myabstract = "NO ABSTRACT FOUND."; } publications.add(new Publication(mytitle, myabstract, myyear, myfauthors, mylauthors, myfnames, jv, jn, jy, jm, jd, jsp, ji, epday, epmonth, epyear, doi, epubsum, epubsum2, authorfull, mypmid)); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void populateAssayResultsFromXML(PCAssay assay, Node assayDescriptionNode) { List<Node> assayResultNodes = assayDescriptionNode .selectNodes("../../../PC-AssaySubmit_data/PC-AssayResults"); if (assayResultNodes.size() == 0) return;//from w w w.j a v a 2 s . c o m for (Node resultNode : assayResultNodes) { PCAssayResult result = new PCAssayResult(); String val = resultNode.selectSingleNode("PC-AssayResults_sid").valueOf("text()"); result.setSID(Long.parseLong(val)); val = resultNode.selectSingleNode("PC-AssayResults_outcome").valueOf("@value"); val = val.substring(0, 1).toUpperCase() + val.substring(1); result.setOutcome(val); val = resultNode.selectSingleNode("PC-AssayResults_rank").valueOf("text()"); result.setRankScore(Integer.parseInt(val)); List<Node> assayDataNodes = resultNode.selectNodes("PC-AssayResults_data/PC-AssayData"); List<String> all = GrowthList.decorate(new ArrayList<String>(assay.getColumns().size() - 2)); result.setAllValues(all); for (Node node : assayDataNodes) { val = node.valueOf("PC-AssayData_tid/text()"); int index = Integer.parseInt(val) - 1; val = node.selectSingleNode(".//*[starts-with(name(),'PC-AssayData_value_')]").getText(); all.set(index, val); } assay.getResults().add(result); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected PCAssay populateAssayFromXMLNode(Node topNode) throws PubChemException { // String assayDescPath = // "PC-AssaySubmit_assay/PC-AssaySubmit_assay_descr/PC-AssayDescription"; Node assayDescNode = null;/* w w w. j a v a 2 s. co m*/ if (topNode.getName().equals("PC-AssayDescription")) assayDescNode = topNode; else { assayDescNode = topNode.selectSingleNode(".//PC-AssayDescription"); } if (assayDescNode == null) throw new PubChemException( String.format("Cannot find PC-AssayDescription node in provided node %s", topNode.getPath())); Node node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_id"); Integer aid = new Integer(node.getText()); try { PCAssay assay = new PCAssay(); if (aid > 0) assay.setAID(aid); node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_version"); Integer version = new Integer(node.getText()); assay.setVersion(version); node = assayDescNode.selectSingleNode("PC-AssayDescription_revision"); Integer revision = new Integer(node.getText()); assay.setRevision(revision); Node trackingNode = assayDescNode .selectSingleNode("PC-AssayDescription_aid-source/PC-Source/PC-Source_db/PC-DBTracking"); node = trackingNode.selectSingleNode("PC-DBTracking_name"); assay.setSourceName(node.getText()); node = trackingNode.selectSingleNode("PC-DBTracking_source-id/Object-id/Object-id_str"); assay.setExtRegId(node.getText()); // hold until date node = trackingNode.selectSingleNode("PC-DBTracking_date"); if (node != null) { String year = node.selectSingleNode("Date/Date_std/Date-std/Date-std_year").getText(); String month = node.selectSingleNode("Date/Date_std/Date-std/Date-std_month").getText(); String day = node.selectSingleNode("Date/Date_std/Date-std/Date-std_day").getText(); if (DEBUGGING) log.info("year: " + year + " month: " + month + " day: " + day); Calendar calendar = Calendar.getInstance(); calendar.set(Integer.parseInt(year), Integer.parseInt(month) - 1, Integer.parseInt(day)); assay.setHoldUntilDate(calendar.getTime()); if (DEBUGGING) log.info(calendar.getTime().toString()); } node = assayDescNode.selectSingleNode("PC-AssayDescription_name"); assay.setName(node.getText()); List<Node> nodes = assayDescNode .selectNodes("PC-AssayDescription_description/PC-AssayDescription_description_E"); assay.setDescription(join(nodes, separator)); nodes = assayDescNode.selectNodes("PC-AssayDescription_protocol/PC-AssayDescription_protocol_E"); assay.setProtocol(join(nodes, separator)); nodes = assayDescNode.selectNodes("PC-AssayDescription_comment/PC-AssayDescription_comment_E"); assay.setComment(join(nodes, separator)); node = assayDescNode.selectSingleNode("PC-AssayDescription_activity-outcome-method"); if (node != null) assay.setActivityOutcomeMethod(node.valueOf("@value")); handlePlots(assay, assayDescNode); node = assayDescNode .selectSingleNode("PC-AssayDescription_grant-number/PC-AssayDescription_grant-number_E"); if (node != null) assay.setGrantNumber(node.getText()); node = assayDescNode.selectSingleNode("PC-AssayDescription_project-category"); if (node != null) assay.setProjectCategory(node.valueOf("@value")); assay.getAssayXRefs().removeAll(assay.getAssayXRefs()); nodes = assayDescNode.selectNodes("PC-AssayDescription_xref/PC-AnnotatedXRef"); handleXRefs(assay, null, nodes); nodes = assayDescNode.selectNodes("PC-AssayDescription_target/PC-AssayTargetInfo"); handleTargetXRefs(assay, null, nodes); handlePanels(assay, assayDescNode); handleColumns(assay, assayDescNode); handleComments(assay, assayDescNode); return assay; } catch (Exception ex) { throw new RuntimeException("Problem with AID " + aid, ex); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handlePlots(PCAssay assay, Node assayDescNode) { assay.getPlotLabels().clear();// w w w . j ava 2 s. c om List<Node> nodes = assayDescNode.selectNodes("PC-AssayDescription_dr/PC-AssayDRAttr"); for (Node node : nodes) { PCPlotLabel label = new PCPlotLabel(); label.setAssay(assay); label.setPlotLabel(Integer.parseInt(node.selectSingleNode("PC-AssayDRAttr_id").getText())); label.setTitle(node.selectSingleNode("PC-AssayDRAttr_descr").getText()); label.setConcentrationTitle(node.selectSingleNode("PC-AssayDRAttr_dn").getText()); label.setResponseTitle(node.selectSingleNode("PC-AssayDRAttr_rn").getText()); String test = nullSafeGet(node, "PC-AssayDRAttr_type", "text()"); label.setDerivedByEquation(!"".equals(test) & !"0".equals(test) ? true : false); assay.getPlotLabels().add(label); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handleComments(PCAssay assay, Node assayDescNode) { assay.getCategorizedComments().clear(); List<Node> nodes = assayDescNode .selectNodes("PC-AssayDescription_categorized-comment/PC-CategorizedComment"); for (Node commentNode : nodes) { String key = commentNode.selectSingleNode("PC-CategorizedComment_title").getText(); String value = join(/*from ww w. jav a 2 s . co m*/ commentNode.selectNodes("PC-CategorizedComment_comment/PC-CategorizedComment_comment_E"), separator); assay.getCategorizedComments().put(key, value); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handlePanels(PCAssay assay, Node assayDescNode) { Node node = assayDescNode.selectSingleNode("PC-AssayDescription_is-panel"); if (node == null) assay.setPanel(false);//from ww w.j ava 2 s . co m else assay.setPanel("true".equals(node.valueOf("@value"))); if (assay.isPanel()) { node = assayDescNode .selectSingleNode("PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_name"); assay.setPanelName(node.valueOf("text()")); node = assayDescNode .selectSingleNode("PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_descr"); assay.setPanelDescription(node == null ? "" : node.valueOf("text()")); } List<Node> nodes = assayDescNode.selectNodes( "PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_member/PC-AssayPanelMember"); for (Node n : nodes) { String mid = n.selectSingleNode("PC-AssayPanelMember_mid").getText(); Node node2 = n.selectSingleNode("PC-AssayPanelMember_name"); String name = node2 == null ? "" : node2.getText(); PCAssayPanel panel = new PCAssayPanel(); panel.setAssay(assay); assay.getPanels().add(panel); panel.setPanelNumber(Integer.parseInt(mid)); panel.setName(name); node2 = n.selectSingleNode("PC-AssayPanelMember_description"); panel.setDescription(node2 == null ? "" : node2.getText()); List<Node> nodes2 = n.selectNodes("PC-AssayPanelMember_protocol/PC-AssayPanelMember_protocol_E"); panel.setProtocol(join(nodes2, separator)); nodes2 = n.selectNodes("PC-AssayPanelMember_comment/PC-AssayPanelMember_comment_E"); panel.setComment(join(nodes2, separator)); List<Node> xrefNodes = n.selectNodes("PC-AssayPanelMember_xref/PC-AnnotatedXRef"); handleXRefs(assay, panel, xrefNodes); List<Node> targetNodes = n.selectNodes("PC-AssayPanelMember_target/PC-AssayTargetInfo"); handleTargetXRefs(assay, panel, targetNodes); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handleColumns(PCAssay assay, Node assayDescNode) { Map<Integer, PCAssayColumn> map = new HashMap<Integer, PCAssayColumn>(); for (PCAssayColumn col : assay.getColumns()) map.put(col.getTID(), col);//from www . j a v a 2s. c o m ensureColumn(assay, -1, "Outcome", "string"); ensureColumn(assay, 0, "Score", "float"); Map<Integer, PCAssayPanel> mapPanels = new HashMap<Integer, PCAssayPanel>(); for (PCAssayPanel panel : assay.getPanels()) mapPanels.put(panel.getPanelNumber(), panel); List<Node> nodes = assayDescNode.selectNodes("PC-AssayDescription_results/PC-ResultType"); for (Node n : nodes) { String tid = n.selectSingleNode("PC-ResultType_tid").getText(); String name = n.selectSingleNode("PC-ResultType_name").getText(); String type = n.selectSingleNode("PC-ResultType_type").valueOf("@value"); PCAssayColumn column = ensureColumn(assay, Integer.parseInt(tid), name, type); Node node = n.selectSingleNode("PC-ResultType_unit"); if (node != null) column.setUnit(node.valueOf("@value")); List<Node> descNodes = n.selectNodes("PC-ResultType_description/PC-ResultType_description_E"); column.setDescription(join(descNodes, separator)); if (DEBUGGING) log.info("Column description: " + join(descNodes, separator)); node = n.selectSingleNode("PC-ResultType_ac"); if (node != null) column.setActiveConcentration("true".equals(node.valueOf("@value"))); node = n.selectSingleNode("PC-ResultType_tc"); if (node != null) { Node node2 = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_dr-id"); if (node2 != null) { column.setCurvePlotLabel(Integer.parseInt(node2.getText())); } String testedConc = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_concentration") .getText(); column.setTestedConcentration(Double.parseDouble(testedConc)); String testedUnit = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_unit") .valueOf("@value"); column.setTestedConcentrationUnit(testedUnit); } node = n.selectSingleNode("PC-ResultType_panel-info/PC-AssayPanelTestResult"); if (node != null) { String panelId = node.selectSingleNode("PC-AssayPanelTestResult_mid").getText(); PCAssayPanel panel = mapPanels.get(Integer.parseInt(panelId)); column.setPanel(panel); String panelColumnType = node.selectSingleNode("PC-AssayPanelTestResult_readout-annot") .valueOf("@value"); column.setPanelReadoutType(panelColumnType); } } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handleXRefs(PCAssay assay, PCAssayPanel panel, List<Node> nodes) { for (Node n : nodes) { Node node = n.selectSingleNode("PC-AnnotatedXRef_comment"); String comment = node == null ? "" : node.getText(); node = n.selectSingleNode("PC-AnnotatedXRef_xref/PC-XRefData/*"); String type = node.getName(); type = type.substring(type.lastIndexOf("_") + 1, type.length()); String database = targetType.containsKey(type) ? targetType.get(type) : type; String id = node.getText(); XRef xref = new XRef(); xref.setXRefId(id);/*from w w w . j av a2 s. c o m*/ xref.setDatabase(database); xref.setType(type); PCAssayXRef aXref = new PCAssayXRef(); aXref.setPanel(panel); aXref.setAssay(assay); aXref.setComment(comment); aXref.setXRef(xref); aXref.setTarget(false); assay.getAssayXRefs().add(aXref); } }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
private String nullSafeGet(Node node, String xpath, String expression) { Node node2 = node.selectSingleNode(xpath); if (null != node2) return node2.valueOf(expression); return ""; }
From source file:bard.pubchem.xml.PubChemXMLParserFactory.java
License:Open Source License
protected void handleTargetXRefs(PCAssay assay, PCAssayPanel panel, List<Node> nodes) { for (Node n : nodes) { String name = n.selectSingleNode("PC-AssayTargetInfo_name").getText(); String id = n.selectSingleNode("PC-AssayTargetInfo_mol-id").getText(); String type = n.selectSingleNode("PC-AssayTargetInfo_molecule-type").valueOf("@value"); String database = targetType.containsKey(type) ? targetType.get(type) : type; Node taxonNode = n.selectSingleNode("PC-AssayTargetInfo_organism/BioSource/BioSource_org/Org-ref"); String taxonName = "", taxonCommon = "", taxon = ""; if (taxonNode != null) { taxonName = nullSafeGet(taxonNode, "Org-ref_taxname", "text()"); taxonCommon = nullSafeGet(taxonNode, "Org-ref_common", "text()"); String db = nullSafeGet(taxonNode, "Org-ref_db/Dbtag/Dbtag_db", "text()"); if (!"taxon".equals(db)) throw new RuntimeException("Non taxon BioSource Org-ref_db (was " + db + ")"); taxonNode = taxonNode.selectSingleNode("Org-ref_db/Dbtag/Dbtag_tag/Object-id/Object-id_id"); taxon = taxonNode.getText(); }/* ww w . j av a2 s .com*/ XRef xref = new XRef(); xref.setXRefId(id); xref.setDatabase(database); xref.setType(type); xref.setName(name); PCAssayXRef aXref = new PCAssayXRef(); aXref.setTarget(true); aXref.setPanel(panel); aXref.setAssay(assay); aXref.setXRef(xref); if (!taxon.equals("")) { aXref.setTaxon(Long.parseLong(taxon)); aXref.setTaxonName(taxonName); aXref.setTaxonCommon(taxonCommon); } assay.getAssayXRefs().add(aXref); } }