Example usage for javax.xml.xpath XPathFactory newXPath

List of usage examples for javax.xml.xpath XPathFactory newXPath

Introduction

In this page you can find the example usage for javax.xml.xpath XPathFactory newXPath.

Prototype

public abstract XPath newXPath();

Source Link

Document

Return a new XPath using the underlying object model determined when the XPathFactory was instantiated.

Usage

From source file:nz.govt.natlib.ndha.wctdpsdepositor.extractor.XPathWctMetsExtractor.java

public void parseFile(byte[] wctMets, String fileName, FileArchiveBuilder fileBuilder) {
    try {//from   w  ww  .java 2s . c  o m

        InputStream inputStream = populateInputStreamFrom(wctMets);

        Document doc = createXmlDocumentFrom(inputStream);
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xpath = xPathFactory.newXPath();
        xpath.setNamespaceContext(new WctNamespaceContext());

        preprocess(doc, xpath, fileBuilder);
        popualteHarvestDate(doc, xpath);
        populateSeedUrlsFrom(doc, xpath);
        populateTargetName(doc, xpath);
        populateCreatedBy(doc, xpath);
        populateCreationDate(doc, xpath);
        populateProvenanceNote(doc, xpath);
        populateCopyrightStatement(doc, xpath);
        populateCopyrightURL(doc, xpath);
        populateAccessRestrictions(doc, xpath);

        populateArchiveFiles(doc, xpath, fileBuilder);
        populateLogFiles(doc, xpath, fileBuilder);
        populateReportFiles(doc, xpath, fileBuilder);
        populateHomeDirectoryFiles(doc, xpath, fileBuilder);

        // Populate anything additional
        populateAdditional(doc, xpath, fileBuilder);

        inputStream = populateInputStreamFrom(wctMets);
        populateWctMets(inputStream, fileName);

    } catch (XPathExpressionException xpe) {
        throw new RuntimeException("An exception occurred while parsing the WCT METS document for " + fileName,
                xpe);
    }
}

From source file:org.adl.sequencer.ADLSeqUtilities.java

/**
 * Initializes one activity (<code>SeqActivity</code>) that will be added to
 * an activity tree./*from   www .ja v  a 2  s  . co m*/
 * 
 * @param iNode   A node from the DOM tree of an element containing
 *                sequencing information.
 * 
 * @param iColl   The collection of reusable sequencing information.
 * 
 * @return An initialized activity (<code>SeqActivity</code>), or <code>
 *         null</code> if there was an error initializing the activity.
 */
private static SeqActivity buildActivityNode(Node iNode, Node iColl) {

    if (_Debug) {
        System.out.println("  :: ADLSeqUtilities  --> BEGIN - " + "buildActivityNode");
    }

    SeqActivity act = new SeqActivity();

    boolean error = false;

    String tempVal = null;

    // Set the activity's ID -- this is a required attribute
    act.setID(ADLSeqUtilities.getAttribute(iNode, "identifier"));

    // Get the activity's resource ID -- if it exsits
    tempVal = ADLSeqUtilities.getAttribute(iNode, "identifierref");
    if (tempVal != null) {
        if (!isEmpty(tempVal)) {
            act.setResourceID(tempVal);
        }
    }

    // Check if the activity is visible
    tempVal = ADLSeqUtilities.getAttribute(iNode, "isvisible");
    if (tempVal != null) {
        if (!isEmpty(tempVal)) {
            act.setIsVisible((Boolean.valueOf(tempVal)).booleanValue());
        }
    }

    // Get the children elements of this activity 
    NodeList children = iNode.getChildNodes();

    // Initalize this activity from the information in the DOM  
    for (int i = 0; i < children.getLength(); i++) {
        Node curNode = children.item(i);

        // Check to see if this is an element node.
        if (curNode.getNodeType() == Node.ELEMENT_NODE) {
            if (curNode.getLocalName().equals("item")) {

                if (_Debug) {
                    System.out.println("  ::--> Found an <item> element");
                }

                // Initialize the nested activity
                SeqActivity nestedAct = ADLSeqUtilities.buildActivityNode(curNode, iColl);

                // Make sure this activity was created successfully
                if (nestedAct != null) {
                    if (_Debug) {
                        System.out.println("  ::--> Adding child");
                    }

                    act.addChild(nestedAct);

                } else {
                    error = true;
                }
            } else if (curNode.getLocalName().equals("title")) {

                if (_Debug) {
                    System.out.println("  ::--> Found the <title> element");
                }

                act.setTitle(ADLSeqUtilities.getElementText(curNode, null));
            } else if (curNode.getLocalName().equals("sequencing")) {

                if (_Debug) {
                    System.out.println("  ::--> Found the <sequencing> element");
                }

                Node seqInfo = curNode;

                // Check to see if the sequencing information is referenced in 
                // the <sequencingCollection>
                tempVal = ADLSeqUtilities.getAttribute(curNode, "IDRef");
                if (tempVal != null) {
                    // Combine local and global sequencing information
                    // Get the referenced Global sequencing information
                    String search = "imsss:sequencing[@ID='" + tempVal + "']";

                    if (_Debug) {
                        System.out.println("  ::--> Looking for XPATH --> " + search);
                    }

                    // Use the referenced set of sequencing information
                    Node seqGlobal = null;

                    XPathFactory pathFactory = XPathFactory.newInstance();
                    XPath path = pathFactory.newXPath();

                    try {
                        seqGlobal = (Node) path.evaluate(search, iColl, XPathConstants.NODE);
                        //XPathAPI.selectSingleNode(iColl, search);
                    } catch (Exception e) {
                        if (_Debug) {
                            System.out.println("  ::--> ERROR : In transform");
                            e.printStackTrace();
                        }
                    }

                    if (seqGlobal != null) {
                        if (_Debug) {
                            System.out.println("  ::--> FOUND");
                        }
                    } else {
                        if (_Debug) {
                            System.out.println("  ::--> ERROR: Not Found");
                        }

                        seqInfo = null;
                        error = true;
                    }

                    if (!error) {

                        // Clone the global node
                        seqInfo = seqGlobal.cloneNode(true);

                        // Loop through the local sequencing element
                        NodeList seqChildren = curNode.getChildNodes();
                        for (int j = 0; j < seqChildren.getLength(); j++) {

                            Node curChild = seqChildren.item(j);

                            // Check to see if this is an element node.
                            if (curChild.getNodeType() == Node.ELEMENT_NODE) {
                                if (_Debug) {
                                    System.out.println("  ::--> Local definition");
                                    System.out.println("  ::-->   " + j);
                                    System.out.println("  ::-->  <" + curChild.getLocalName() + ">");
                                }

                                // Add this to the global sequencing info
                                try {
                                    seqInfo.appendChild(curChild);
                                } catch (org.w3c.dom.DOMException e) {
                                    if (_Debug) {
                                        System.out.println("  ::--> ERROR: ");
                                        e.printStackTrace();
                                    }

                                    error = true;
                                    seqInfo = null;
                                }
                            }
                        }
                    }
                }

                // If we have an node to look at, extract its sequencing info
                if (seqInfo != null) {
                    // Record this activity's sequencing XML fragment
                    //                  XMLSerializer serializer = new XMLSerializer();

                    // -+- TODO -+-
                    //                  serializer.setNewLine("CR-LF");
                    //                  act.setXMLFragment(serializer.writeToString(seqInfo));

                    // Extract the sequencing information for this activity
                    error = !ADLSeqUtilities.extractSeqInfo(seqInfo, act);

                    if (_Debug) {
                        System.out.println("  ::--> Extracted Sequencing Info");
                    }
                }
            }
        }
    }

    // Make sure this activity either has an associated resource or children
    if (act.getResourceID() == null && !act.hasChildren(true)) {
        // This is not a vaild activity -- ignore it
        error = true;
    }

    // If the activity failed to initialize, clear the variable
    if (error) {
        act = null;
    }

    if (_Debug) {
        System.out.println("  ::--> error == " + error);
        System.out.println("  :: ADLSeqUtilities  --> END   - " + "buildActivityNode");
    }

    return act;
}

From source file:org.ala.documentmapper.FlickrDocumentMapper.java

@SuppressWarnings({ "unchecked", "rawtypes" })
private void handleMachineTag(ParsedDocument parsedDoc, Document xmlDocument, String subject,
        String xpathString) throws Exception {
    //"/rsp/photo/tags/tag[@machine_tag=1]/@raw[starts-with(., 'taxonomy:binomial')]"
    //      String tag = getXPathSingleValue(xmlDocument, xpath);
    XPathFactory factory = XPathFactory.newInstance();
    XPath xpath = factory.newXPath();
    NodeList nodes = (NodeList) xpath.evaluate(xpathString, xmlDocument, XPathConstants.NODESET);
    boolean gotSciName = false;
    String subspecies = null;/*from   www .j  a v a 2  s .co m*/
    String genus = null;
    String family = null;
    String order = null;
    String suborder = null;
    String kingdom = null;

    for (int i = 0; i < nodes.getLength(); i++) {

        Node node = nodes.item(i);
        String machineTag = node.getNodeValue();
        int charIdx = machineTag.indexOf('=');
        if (charIdx > 0) {
            String scientificName = machineTag.substring(charIdx + 1);
            scientificName = scientificName.trim();
            if (machineTag != null) {
                machineTag = machineTag.toLowerCase();
                if (machineTag.contains("binomial")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.SPECIES.toString(), scientificName));
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), scientificName));
                    gotSciName = true;
                } else if (machineTag.contains("trinomial")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.SUBSPECIES.toString(), scientificName));
                    subspecies = scientificName;
                    //               } else if(machineTag.contains("common")){
                    //                  parsedDoc.getTriples().add(new Triple(subject, Predicates.COMMON_NAME.toString(), scientificName));
                } else if (machineTag.contains("genus")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.GENUS.toString(), scientificName));
                    genus = scientificName;
                } else if (machineTag.contains("family")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.FAMILY.toString(), scientificName));
                    family = scientificName;
                } else if (machineTag.contains("order")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.ORDER.toString(), scientificName));
                    order = scientificName;
                } else if (machineTag.contains("suborder")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.SUBORDER.toString(), scientificName));
                    suborder = scientificName;
                } else if (machineTag.contains("kingdom")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.KINGDOM.toString(), scientificName));
                    kingdom = scientificName;
                } else if (machineTag.contains("scientific")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), scientificName));
                    gotSciName = true;
                } else if (machineTag.contains("country")) {
                    parsedDoc.getTriples()
                            .add(new Triple(subject, Predicates.COUNTRY.toString(), scientificName));
                }
            }
        }
    }

    if (!gotSciName) {
        if (subspecies != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), subspecies));
        } else if (genus != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), genus));
        } else if (family != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), family));
        } else if (order != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), order));
        } else if (suborder != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), suborder));
        } else if (kingdom != null) {
            parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), kingdom));
        }
    }
}

From source file:org.ala.documentmapper.XMLDocumentMapper.java

/**
 * Map the fields configured in the supplied <code>mappingList</code>.
 * /*w w w.  j  ava2s. co  m*/
 * @param mappingList
 * @param document
 * @param parsedDoc
 * @param isDublinCore
 */
private void doMapping(List<Mapping> mappingList, Document document, ParsedDocument parsedDoc,
        boolean isDublinCore) {

    XPathFactory factory = XPathFactory.newInstance();
    XPath xpath = factory.newXPath();
    // if (getNamespaceContext() != null)
    // xpath.setNamespaceContext(getNamespaceContext());
    for (Mapping mapping : mappingList) {

        if (mapping.mappingType == MappingType.XPATH) {
            performXPathMapping(document, parsedDoc, isDublinCore, xpath, mapping);
        } else if (mapping.mappingType == MappingType.REGEX) {
            performRegexMapping(document, parsedDoc, isDublinCore, mapping);
        }
    }
}

From source file:org.ala.documentmapper.XMLDocumentMapper.java

/**
 * Uses the supplied xpath to retrieve values
 * /*from  ww w  .j  ava 2 s .co  m*/
 * @param document
 * @param xpathAsString
 * @return
 * @throws Exception
 */
protected List<String> getXPathValues(Document document, String xpathAsString) throws Exception {

    XPathFactory factory = XPathFactory.newInstance();
    XPath xpath = factory.newXPath();

    List<String> extractedValues = new ArrayList<String>();
    NodeList nodes = (NodeList) xpath.evaluate(xpathAsString, document, XPathConstants.NODESET);

    for (int i = 0; i < nodes.getLength(); i++) {
        String value = extractValue(nodes.item(i));
        value = StringUtils.trimToNull(value);
        if (value != null) {
            extractedValues.add(value);
        }
    }
    return extractedValues;
}

From source file:org.ala.documentmapper.XMLDocumentMapper.java

/**
 * Uses the supplied xpath to retrieve values
 * /*from   ww  w. j a  v a2 s .c om*/
 * @param document
 * @param xpathAsString
 * @return
 * @throws Exception
 */
protected String getXPathSingleValue(Document document, String xpathAsString) throws Exception {

    XPathFactory factory = XPathFactory.newInstance();
    XPath xpath = factory.newXPath();

    NodeList nodes = null;

    try {
        nodes = (NodeList) xpath.evaluate(xpathAsString, document, XPathConstants.NODESET);

        for (int i = 0; i < nodes.getLength(); i++) {
            String value = extractValue(nodes.item(i));
            value = StringUtils.trimToNull(value);
            if (value != null) {
                return value;
            }
        }
    } catch (XPathException e) {
        String value = (String) xpath.evaluate(xpathAsString, document, XPathConstants.STRING);
        return value;
    }

    return null;
}

From source file:org.alfresco.po.share.dashlet.AbstractDashlet.java

/**
 * Parses the xml string of the original-title attribute element to get tooltip 
 * data for report dashlets//from www  .j  av  a  2 s.c  o m
 * 
 * @param xml String
 * @param element String
 * @return String
 */
protected String getElement(String xml, String element) throws Exception {
    String tooltipElement = " ";
    xml = xml.replaceAll("alt=\"avatar\">", "alt=\"avatar\"/>");
    xml = xml.replaceAll("<br>", "");

    XPathFactory xpathFactory = XPathFactory.newInstance();
    XPath xpath = xpathFactory.newXPath();
    InputSource source = new InputSource(new StringReader(xml));

    try {
        tooltipElement = (String) xpath.evaluate(element, source, XPathConstants.STRING);

    } catch (XPathExpressionException ee) {
        logger.error("Cannot parse xml string " + ee);
    }
    return tooltipElement;
}

From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java

/**
 * Returns a list of ref nodes from the ref-list of the DOM.
 *
 * @param doc DOM representation of the XML
 * @return NodeList of ref elements//  w  ww  .  j a  v  a  2s. com
 * @throws XPathExpressionException
 */
private NodeList getReferenceNodes(Document doc) throws XPathExpressionException {
    XPathFactory factory = XPathFactory.newInstance();
    XPath xpath = factory.newXPath();
    XPathExpression expr = xpath.compile("//back/ref-list[title='References']/ref");
    Object result = expr.evaluate(doc, XPathConstants.NODESET);

    NodeList refList = (NodeList) result;

    if (refList.getLength() == 0) {
        expr = xpath.compile("//back/ref-list/ref");
        result = expr.evaluate(doc, XPathConstants.NODESET);
        refList = (NodeList) result;
    }
    return refList;
}

From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java

/**
 * Get references for a given article//  w w  w .j a  v a 2s.  c  o  m
 *
 * @param doc article xml
 * @return references
 */
public ArrayList<CitationReference> getReferences(Document doc) {
    ArrayList<CitationReference> list = new ArrayList<CitationReference>();

    if (doc == null) {
        return list;
    }

    try {
        NodeList refList = getReferenceNodes(doc);

        XPathFactory factory = XPathFactory.newInstance();
        XPath xpath = factory.newXPath();

        XPathExpression typeExpr = xpath.compile("//citation | //nlm-citation | //element-citation");
        XPathExpression titleExpr = xpath.compile("//article-title");
        XPathExpression authorsExpr = xpath.compile("//person-group[@person-group-type='author']/name");
        XPathExpression journalExpr = xpath.compile("//source");
        XPathExpression volumeExpr = xpath.compile("//volume");
        XPathExpression numberExpr = xpath.compile("//label");
        XPathExpression fPageExpr = xpath.compile("//fpage");
        XPathExpression lPageExpr = xpath.compile("//lpage");
        XPathExpression yearExpr = xpath.compile("//year");
        XPathExpression publisherExpr = xpath.compile("//publisher-name");

        for (int i = 0; i < refList.getLength(); i++) {

            Node refNode = refList.item(i);
            CitationReference citation = new CitationReference();

            DocumentFragment df = doc.createDocumentFragment();
            df.appendChild(refNode);

            // citation type
            Object resultObj = typeExpr.evaluate(df, XPathConstants.NODE);
            Node resultNode = (Node) resultObj;
            if (resultNode != null) {
                String citationType = getCitationType(resultNode);
                if (citationType != null) {
                    citation.setCitationType(citationType);
                }
            }

            // title
            resultObj = titleExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setTitle(resultNode.getTextContent());
            }

            // authors
            resultObj = authorsExpr.evaluate(df, XPathConstants.NODESET);
            NodeList resultNodeList = (NodeList) resultObj;
            ArrayList<String> authors = new ArrayList<String>();
            for (int j = 0; j < resultNodeList.getLength(); j++) {
                Node nameNode = resultNodeList.item(j);
                NodeList namePartList = nameNode.getChildNodes();
                String surName = "";
                String givenName = "";
                for (int k = 0; k < namePartList.getLength(); k++) {
                    Node namePartNode = namePartList.item(k);
                    if (namePartNode.getNodeName().equals("surname")) {
                        surName = namePartNode.getTextContent();
                    } else if (namePartNode.getNodeName().equals("given-names")) {
                        givenName = namePartNode.getTextContent();
                    }
                }
                authors.add(givenName + " " + surName);
            }

            citation.setAuthors(authors);

            // journal title
            resultObj = journalExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setJournalTitle(resultNode.getTextContent());
            }

            // volume
            resultObj = volumeExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setVolume(resultNode.getTextContent());
            }

            // citation number
            resultObj = numberExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setNumber(resultNode.getTextContent());
            }

            // citation pages
            String firstPage = null;
            String lastPage = null;
            resultObj = fPageExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                firstPage = resultNode.getTextContent();
            }

            resultObj = lPageExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                lastPage = resultNode.getTextContent();
            }

            if (firstPage != null) {
                if (lastPage != null) {
                    citation.setPages(firstPage + "-" + lastPage);
                } else {
                    citation.setPages(firstPage);
                }
            }

            // citation year
            resultObj = yearExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setYear(resultNode.getTextContent());
            }

            // citation publisher
            resultObj = publisherExpr.evaluate(df, XPathConstants.NODE);
            resultNode = (Node) resultObj;
            if (resultNode != null) {
                citation.setPublisher(resultNode.getTextContent());
            }

            list.add(citation);
        }

    } catch (Exception e) {
        log.error("Error occurred while gathering the citation references.", e);
    }

    return list;

}

From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java

/**
 * Decorates the citation elements of the XML DOM with extra information from the citedArticle table in the DB. An
 * extraCitationInfo element is appended to each citation element.  It will contain between one and two attributes
 * with the extra info: citedArticleID, the DB primary key, and doi, the DOI string, if it exists.
 *
 * @param doc           DOM of the XML//from www. jav  a 2  s  .c  om
 * @param citedArticles List of CitedArticle persistent objects
 * @return modified DOM
 * @throws ApplicationException
 */
private Document addExtraCitationInfo(Document doc, List<CitedArticle> citedArticles)
        throws ApplicationException {
    if (citedArticles.isEmpty()) {
        return doc; // This happens in some unit tests.
    }
    try {
        NodeList referenceList = getReferenceNodes(doc);

        // If sortOrder on citedArticle has duplicate value, you will get below error.Ideally it should not happen
        // but since sortOrder is not unique it may be possible to update that field from backend to have duplicate value
        // Now index is on sortOrder(article.hbm.xml), index will be only on one of those of duplicate value and
        // hence citedArticle will have less count then the xml.
        if (referenceList.getLength() != citedArticles.size()) {
            throw new ApplicationException(String.format("Article has %d citedArticles but %d references",
                    citedArticles.size(), referenceList.getLength()));
        }
        XPathFactory factory = XPathFactory.newInstance();
        XPath xpath = factory.newXPath();
        XPathExpression citationExpr = xpath
                .compile("./citation|./nlm-citation|./element-citation|./mixed-citation");
        for (int i = 0; i < referenceList.getLength(); i++) {
            Node referenceNode = referenceList.item(i);
            Node citationNode = (Node) citationExpr.evaluate(referenceNode, XPathConstants.NODE);
            CitedArticle citedArticle = citedArticles.get(i);
            if (citationNode != null && "journal".equals(getCitationType(citationNode))
                    && citedArticleIsValid(citedArticle)) {
                Element extraInfo = doc.createElement("extraCitationInfo");
                setExtraCitationInfo(extraInfo, citedArticle);
                citationNode.appendChild(extraInfo);
            }
        }
    } catch (XPathExpressionException xpee) {
        throw new ApplicationException(xpee);
    }

    return doc;
}