Example usage for org.dom4j Node selectNodes

List of usage examples for org.dom4j Node selectNodes

Introduction

In this page you can find the example usage for org.dom4j Node selectNodes.

Prototype

List<Node> selectNodes(String xpathExpression);

Source Link

Document

selectNodes evaluates an XPath expression and returns the result as a List of Node instances or String instances depending on the XPath expression.

Usage

From source file:SolrUpdate.java

License:Apache License

public void processUrl() throws Exception {

    String jv, jn, ji, jd, jm, jy, jsp, authorfull, doi, epday, epmonth, epyear, epubsum, epubsum2 = "";
    jv = jn = ji = jd = jm = jy = jsp = authorfull = doi = epday = epmonth = epyear = epubsum = epubsum2 = "";

    SAXReader reader = new SAXReader();
    SAXReader reader2 = new SAXReader();
    Document document = null;/*  w ww.jav  a  2  s  .c  om*/

    String mytitle, myabstract, myyear, myfullname = "";
    Element journalname, journalyear, journalmonth, journalday, journalvolume, journalissue, journalpagestart,
            epubday, epubmonth, epubyear, pubdoi;
    int mypmid;

    List<String> mylauthors = new ArrayList<String>();
    List<String> myfauthors = new ArrayList<String>();
    List<String> myfnames = new ArrayList<String>();

    //PubMed

    String pubmedlist = "";
    Iterator iditer = publications.iterator();

    while (iditer.hasNext()) {
        int currpmid = ((Publication) iditer.next()).getPmid();
        if (pubmedlist.length() < 1) {
            pubmedlist += currpmid;

        } else {
            pubmedlist += "," + currpmid;
        }
    }

    String url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + pubmedlist
            + "&retmax=200&retmode=xml&rettype=abstract";
    Document pubdoc = reader2.read(url);

    @SuppressWarnings("unchecked")
    List<Node> thelist = pubdoc.selectNodes("//PubmedArticle| //PubmedBookArticle");

    Element abstractnode, titlenode, yearsnode, pmidnode;
    @SuppressWarnings("rawtypes")
    List firstnamenode;
    @SuppressWarnings("rawtypes")
    List lastnamenode;

    for (Node currnode : thelist) {
        mylauthors = new ArrayList<String>();
        myfauthors = new ArrayList<String>();
        myfnames = new ArrayList<String>();
        epubsum = epubsum2 = authorfull = "";

        titlenode = (Element) currnode.selectSingleNode(".//ArticleTitle | .//BookTitle");
        yearsnode = (Element) currnode
                .selectSingleNode(".//PubDate/Year | .//DateCompleted/Year | .//DateCreated/Year");
        journalname = (Element) currnode.selectSingleNode(".//Journal/Title");
        journalyear = (Element) currnode.selectSingleNode(".//PubDate/Year");
        journalmonth = (Element) currnode.selectSingleNode(".//PubDate/Month");

        journalday = (Element) currnode.selectSingleNode(".//PubDate/Day");
        journalvolume = (Element) currnode.selectSingleNode(".//JournalIssue/Volume");
        journalissue = (Element) currnode.selectSingleNode(".//JournalIssue/Issue");
        journalpagestart = (Element) currnode.selectSingleNode(".//Pagination/MedlinePgn");

        epubday = (Element) currnode.selectSingleNode(
                ".//PubMedPubDate[@PubStatus='aheadofprint']/Day  | .//PubMedPubDate[@PubStatus='epublish']/Day ");
        epubmonth = (Element) currnode.selectSingleNode(
                ".//PubMedPubDate[@PubStatus='aheadofprint']/Month | .//PubMedPubDate[@PubStatus='epublish']/Month");
        epubyear = (Element) currnode.selectSingleNode(
                ".//PubMedPubDate[@PubStatus='aheadofprint']/Year | .//PubMedPubDate[@PubStatus='epublish']/Year");

        pubdoi = (Element) currnode.selectSingleNode(".//ArticleId[@IdType='doi']");

        firstnamenode = currnode.selectNodes(".//ForeName");
        lastnamenode = currnode.selectNodes(".//LastName");
        abstractnode = (Element) currnode.selectSingleNode(".//Abstract/AbstractText[1]");
        pmidnode = (Element) currnode.selectSingleNode(".//PMID");

        myfnames = new ArrayList<String>();
        @SuppressWarnings("rawtypes")
        Iterator fiter = firstnamenode.iterator();
        @SuppressWarnings("rawtypes")
        Iterator liter = lastnamenode.iterator();

        if (journalname != null) {
            jn = journalname.getText();
        }
        if (journalvolume != null) {
            jv = journalvolume.getText();
        }
        if (journalissue != null) {
            ji = journalissue.getText();
        }
        if (journalmonth != null) {
            jm = journalmonth.getText();
        }
        if (journalyear != null) {
            jy = journalyear.getText();
        }
        if (journalpagestart != null) {
            jsp = journalpagestart.getText();
        }
        if (journalday != null) {
            jd = journalday.getText();
        }
        if (epubday != null) {
            epday = epubday.getText();
        }
        if (epubmonth != null) {
            epmonth = epubmonth.getText();
        }
        if (epubyear != null) {
            epyear = epubyear.getText();
        }
        if (pubdoi != null) {
            doi = "doi: " + pubdoi.getText();
        }
        if (jv.length() > 0) {
            epubsum2 += jv;
        }

        if (ji.length() > 0) {
            epubsum2 += "(" + ji + ")" + ":";
        }

        if (jsp.length() > 0) {
            epubsum2 += jsp + ".";
        }

        if (epmonth.length() < 1 && epyear.length() < 1 && epday.length() < 1) {
            epubsum = "[Epub ahead of print]";
        } else if (epyear.length() > 0) {
            epubsum = "Epub " + epyear + " " + epmonth + " " + epday;
        } else {
            epubsum = "";
        }

        mytitle = titlenode.getText();
        myyear = yearsnode.getText();
        mypmid = Integer.valueOf(pmidnode.getText());

        while (fiter.hasNext()) {
            Element fname = (Element) fiter.next();
            Element lname = (Element) liter.next();

            myfauthors.add(fname.getText());
            mylauthors.add(lname.getText());

            myfullname = fname.getText() + " " + lname.getText();
            myfnames.add(myfullname);

            if (fiter.hasNext()) {
                authorfull = authorfull + myfullname + ", ";
            } else {
                authorfull = authorfull + myfullname;
            }

        }

        if (abstractnode != null) {
            myabstract = abstractnode.getText();
        } else {
            myabstract = "NO ABSTRACT FOUND.";
        }

        publications.add(new Publication(mytitle, myabstract, myyear, myfauthors, mylauthors, myfnames, jv, jn,
                jy, jm, jd, jsp, ji, epday, epmonth, epyear, doi, epubsum, epubsum2, authorfull, mypmid));

    }

}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected void populateAssayResultsFromXML(PCAssay assay, Node assayDescriptionNode) {
    List<Node> assayResultNodes = assayDescriptionNode
            .selectNodes("../../../PC-AssaySubmit_data/PC-AssayResults");
    if (assayResultNodes.size() == 0)
        return;//from  w w  w.  j  ava  2s  .c o m

    for (Node resultNode : assayResultNodes) {
        PCAssayResult result = new PCAssayResult();

        String val = resultNode.selectSingleNode("PC-AssayResults_sid").valueOf("text()");
        result.setSID(Long.parseLong(val));

        val = resultNode.selectSingleNode("PC-AssayResults_outcome").valueOf("@value");
        val = val.substring(0, 1).toUpperCase() + val.substring(1);
        result.setOutcome(val);

        val = resultNode.selectSingleNode("PC-AssayResults_rank").valueOf("text()");
        result.setRankScore(Integer.parseInt(val));

        List<Node> assayDataNodes = resultNode.selectNodes("PC-AssayResults_data/PC-AssayData");

        List<String> all = GrowthList.decorate(new ArrayList<String>(assay.getColumns().size() - 2));
        result.setAllValues(all);
        for (Node node : assayDataNodes) {
            val = node.valueOf("PC-AssayData_tid/text()");
            int index = Integer.parseInt(val) - 1;
            val = node.selectSingleNode(".//*[starts-with(name(),'PC-AssayData_value_')]").getText();
            all.set(index, val);
        }
        assay.getResults().add(result);
    }

}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected PCAssay populateAssayFromXMLNode(Node topNode) throws PubChemException {
    // String assayDescPath =
    // "PC-AssaySubmit_assay/PC-AssaySubmit_assay_descr/PC-AssayDescription";
    Node assayDescNode = null;
    if (topNode.getName().equals("PC-AssayDescription"))
        assayDescNode = topNode;/*  www  . j  a  v a  2  s.  c o m*/
    else {
        assayDescNode = topNode.selectSingleNode(".//PC-AssayDescription");
    }
    if (assayDescNode == null)
        throw new PubChemException(
                String.format("Cannot find PC-AssayDescription node in provided node %s", topNode.getPath()));

    Node node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_id");
    Integer aid = new Integer(node.getText());

    try {
        PCAssay assay = new PCAssay();
        if (aid > 0)
            assay.setAID(aid);

        node = assayDescNode.selectSingleNode("PC-AssayDescription_aid/PC-ID/PC-ID_version");
        Integer version = new Integer(node.getText());
        assay.setVersion(version);

        node = assayDescNode.selectSingleNode("PC-AssayDescription_revision");
        Integer revision = new Integer(node.getText());
        assay.setRevision(revision);

        Node trackingNode = assayDescNode
                .selectSingleNode("PC-AssayDescription_aid-source/PC-Source/PC-Source_db/PC-DBTracking");

        node = trackingNode.selectSingleNode("PC-DBTracking_name");
        assay.setSourceName(node.getText());

        node = trackingNode.selectSingleNode("PC-DBTracking_source-id/Object-id/Object-id_str");
        assay.setExtRegId(node.getText());

        // hold until date
        node = trackingNode.selectSingleNode("PC-DBTracking_date");
        if (node != null) {
            String year = node.selectSingleNode("Date/Date_std/Date-std/Date-std_year").getText();
            String month = node.selectSingleNode("Date/Date_std/Date-std/Date-std_month").getText();
            String day = node.selectSingleNode("Date/Date_std/Date-std/Date-std_day").getText();
            if (DEBUGGING)
                log.info("year: " + year + " month: " + month + " day: " + day);
            Calendar calendar = Calendar.getInstance();
            calendar.set(Integer.parseInt(year), Integer.parseInt(month) - 1, Integer.parseInt(day));
            assay.setHoldUntilDate(calendar.getTime());
            if (DEBUGGING)
                log.info(calendar.getTime().toString());
        }

        node = assayDescNode.selectSingleNode("PC-AssayDescription_name");
        assay.setName(node.getText());

        List<Node> nodes = assayDescNode
                .selectNodes("PC-AssayDescription_description/PC-AssayDescription_description_E");
        assay.setDescription(join(nodes, separator));

        nodes = assayDescNode.selectNodes("PC-AssayDescription_protocol/PC-AssayDescription_protocol_E");
        assay.setProtocol(join(nodes, separator));

        nodes = assayDescNode.selectNodes("PC-AssayDescription_comment/PC-AssayDescription_comment_E");
        assay.setComment(join(nodes, separator));

        node = assayDescNode.selectSingleNode("PC-AssayDescription_activity-outcome-method");
        if (node != null)
            assay.setActivityOutcomeMethod(node.valueOf("@value"));

        handlePlots(assay, assayDescNode);

        node = assayDescNode
                .selectSingleNode("PC-AssayDescription_grant-number/PC-AssayDescription_grant-number_E");
        if (node != null)
            assay.setGrantNumber(node.getText());

        node = assayDescNode.selectSingleNode("PC-AssayDescription_project-category");
        if (node != null)
            assay.setProjectCategory(node.valueOf("@value"));

        assay.getAssayXRefs().removeAll(assay.getAssayXRefs());

        nodes = assayDescNode.selectNodes("PC-AssayDescription_xref/PC-AnnotatedXRef");
        handleXRefs(assay, null, nodes);

        nodes = assayDescNode.selectNodes("PC-AssayDescription_target/PC-AssayTargetInfo");
        handleTargetXRefs(assay, null, nodes);

        handlePanels(assay, assayDescNode);

        handleColumns(assay, assayDescNode);

        handleComments(assay, assayDescNode);

        return assay;
    } catch (Exception ex) {
        throw new RuntimeException("Problem with AID " + aid, ex);
    }
}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected void handlePlots(PCAssay assay, Node assayDescNode) {
    assay.getPlotLabels().clear();/*from w  w  w .j  av  a2 s .  c  om*/
    List<Node> nodes = assayDescNode.selectNodes("PC-AssayDescription_dr/PC-AssayDRAttr");
    for (Node node : nodes) {
        PCPlotLabel label = new PCPlotLabel();
        label.setAssay(assay);
        label.setPlotLabel(Integer.parseInt(node.selectSingleNode("PC-AssayDRAttr_id").getText()));
        label.setTitle(node.selectSingleNode("PC-AssayDRAttr_descr").getText());
        label.setConcentrationTitle(node.selectSingleNode("PC-AssayDRAttr_dn").getText());
        label.setResponseTitle(node.selectSingleNode("PC-AssayDRAttr_rn").getText());
        String test = nullSafeGet(node, "PC-AssayDRAttr_type", "text()");
        label.setDerivedByEquation(!"".equals(test) & !"0".equals(test) ? true : false);
        assay.getPlotLabels().add(label);
    }
}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected void handleComments(PCAssay assay, Node assayDescNode) {
    assay.getCategorizedComments().clear();
    List<Node> nodes = assayDescNode
            .selectNodes("PC-AssayDescription_categorized-comment/PC-CategorizedComment");
    for (Node commentNode : nodes) {
        String key = commentNode.selectSingleNode("PC-CategorizedComment_title").getText();
        String value = join(/*from  w w w. ja v  a2 s  .  com*/
                commentNode.selectNodes("PC-CategorizedComment_comment/PC-CategorizedComment_comment_E"),
                separator);
        assay.getCategorizedComments().put(key, value);
    }
}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected void handlePanels(PCAssay assay, Node assayDescNode) {
    Node node = assayDescNode.selectSingleNode("PC-AssayDescription_is-panel");
    if (node == null)
        assay.setPanel(false);//from  ww w.  java 2  s.  c om
    else
        assay.setPanel("true".equals(node.valueOf("@value")));

    if (assay.isPanel()) {
        node = assayDescNode
                .selectSingleNode("PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_name");
        assay.setPanelName(node.valueOf("text()"));
        node = assayDescNode
                .selectSingleNode("PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_descr");
        assay.setPanelDescription(node == null ? "" : node.valueOf("text()"));
    }

    List<Node> nodes = assayDescNode.selectNodes(
            "PC-AssayDescription_panel-info/PC-AssayPanel/PC-AssayPanel_member/PC-AssayPanelMember");
    for (Node n : nodes) {
        String mid = n.selectSingleNode("PC-AssayPanelMember_mid").getText();
        Node node2 = n.selectSingleNode("PC-AssayPanelMember_name");
        String name = node2 == null ? "" : node2.getText();

        PCAssayPanel panel = new PCAssayPanel();
        panel.setAssay(assay);
        assay.getPanels().add(panel);
        panel.setPanelNumber(Integer.parseInt(mid));
        panel.setName(name);

        node2 = n.selectSingleNode("PC-AssayPanelMember_description");
        panel.setDescription(node2 == null ? "" : node2.getText());

        List<Node> nodes2 = n.selectNodes("PC-AssayPanelMember_protocol/PC-AssayPanelMember_protocol_E");
        panel.setProtocol(join(nodes2, separator));

        nodes2 = n.selectNodes("PC-AssayPanelMember_comment/PC-AssayPanelMember_comment_E");
        panel.setComment(join(nodes2, separator));

        List<Node> xrefNodes = n.selectNodes("PC-AssayPanelMember_xref/PC-AnnotatedXRef");
        handleXRefs(assay, panel, xrefNodes);

        List<Node> targetNodes = n.selectNodes("PC-AssayPanelMember_target/PC-AssayTargetInfo");
        handleTargetXRefs(assay, panel, targetNodes);
    }
}

From source file:bard.pubchem.xml.PubChemXMLParserFactory.java

License:Open Source License

protected void handleColumns(PCAssay assay, Node assayDescNode) {
    Map<Integer, PCAssayColumn> map = new HashMap<Integer, PCAssayColumn>();
    for (PCAssayColumn col : assay.getColumns())
        map.put(col.getTID(), col);/* ww w .  j  a  va  2s. co m*/

    ensureColumn(assay, -1, "Outcome", "string");
    ensureColumn(assay, 0, "Score", "float");

    Map<Integer, PCAssayPanel> mapPanels = new HashMap<Integer, PCAssayPanel>();
    for (PCAssayPanel panel : assay.getPanels())
        mapPanels.put(panel.getPanelNumber(), panel);

    List<Node> nodes = assayDescNode.selectNodes("PC-AssayDescription_results/PC-ResultType");
    for (Node n : nodes) {
        String tid = n.selectSingleNode("PC-ResultType_tid").getText();
        String name = n.selectSingleNode("PC-ResultType_name").getText();
        String type = n.selectSingleNode("PC-ResultType_type").valueOf("@value");
        PCAssayColumn column = ensureColumn(assay, Integer.parseInt(tid), name, type);

        Node node = n.selectSingleNode("PC-ResultType_unit");
        if (node != null)
            column.setUnit(node.valueOf("@value"));

        List<Node> descNodes = n.selectNodes("PC-ResultType_description/PC-ResultType_description_E");
        column.setDescription(join(descNodes, separator));
        if (DEBUGGING)
            log.info("Column description: " + join(descNodes, separator));

        node = n.selectSingleNode("PC-ResultType_ac");
        if (node != null)
            column.setActiveConcentration("true".equals(node.valueOf("@value")));

        node = n.selectSingleNode("PC-ResultType_tc");
        if (node != null) {
            Node node2 = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_dr-id");
            if (node2 != null) {
                column.setCurvePlotLabel(Integer.parseInt(node2.getText()));
            }
            String testedConc = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_concentration")
                    .getText();
            column.setTestedConcentration(Double.parseDouble(testedConc));
            String testedUnit = node.selectSingleNode("PC-ConcentrationAttr/PC-ConcentrationAttr_unit")
                    .valueOf("@value");
            column.setTestedConcentrationUnit(testedUnit);
        }

        node = n.selectSingleNode("PC-ResultType_panel-info/PC-AssayPanelTestResult");
        if (node != null) {
            String panelId = node.selectSingleNode("PC-AssayPanelTestResult_mid").getText();
            PCAssayPanel panel = mapPanels.get(Integer.parseInt(panelId));
            column.setPanel(panel);
            String panelColumnType = node.selectSingleNode("PC-AssayPanelTestResult_readout-annot")
                    .valueOf("@value");
            column.setPanelReadoutType(panelColumnType);
        }
    }
}

From source file:bixo.examples.webmining.AnalyzeHtml.java

License:Apache License

/**
 * Utility routine to get back a list of nodes from the HTML page document,
 * which match the provided XPath expression.
 * /*  www  . j  a v a  2s.co  m*/
 * @param xPath expression to match
 * @return array of matching nodes, or an empty array if nothing matches
 * @throws ExtractionException
 */
@SuppressWarnings("unchecked")
private List<Node> getNodes(Node node, String xPath) {
    List<Node> result = node.selectNodes(xPath);
    if (result == null) {
        result = new ArrayList<Node>();
    }

    return result;
}

From source file:cinnamon.global.Conf.java

License:Open Source License

@SuppressWarnings("unchecked")
public List<String> getApiClasses(String repository) {
    Node repo = xml.selectSingleNode("//repository[name='" + repository + "']/apiClasses");
    List<Node> apiNodes = repo.selectNodes("apiClass");
    List<String> apiClasses = new ArrayList<>();
    for (Node n : apiNodes) {
        apiClasses.add(n.getText());//from  ww w  .ja  va  2s.c o m
    }
    return apiClasses;
}

From source file:com.alefissak.parsers.PlanningParserImpl.java

License:Apache License

/**
 * Parse nodes of receivers, get all informations from these nodes, then
 * add them into planning receivers list
 * //from  ww w.j a va 2  s .  com
 * @param planning the planning to be fill
 * @param planningNode node containing informations on receivers
 * @throws AlefissakParsingException
 */
private void parseReceivers(Planning planning, Node planningNode) throws AlefissakParsingException {

    @SuppressWarnings("unchecked")
    List<Node> receiverNodes = planningNode
            .selectNodes(Constants.CFG_RECEIVERS_TAG_NAME + "/" + Constants.CFG_RECEIVER_TAG_NAME);

    if (receiverNodes.isEmpty()) {
        throw new AlefissakParsingException("You have to specify at least one receiver");
    }

    Node workingNode;

    for (Iterator<Node> iter = receiverNodes.iterator(); iter.hasNext();) {
        Node receiverNode = iter.next();

        workingNode = receiverNode.selectSingleNode(Constants.CFG_NAME_TAG_NAME);

        if (workingNode == null) {
            throw new AlefissakParsingException("Please define all receiver's name");
        }

        AlefissakReceiver receiver = new AlefissakReceiver();
        receiver.setName(workingNode.getText().toString());

        workingNode = receiverNode.selectSingleNode(Constants.CFG_MAIL_TAG_NAME);
        if (workingNode == null) {
            throw new AlefissakParsingException("Please define all receiver's mail");
        }

        receiver.setMail(workingNode.getText().toString());

        workingNode = receiverNode.selectSingleNode(Constants.CFG_VARS_TAG_NAME);

        if (workingNode != null) {
            setVars(receiver, workingNode.getText().toString());
        }

        planning.addReceiver(receiver);
    }
}