List of usage examples for javax.xml.xpath XPathFactory newXPath
public abstract XPath newXPath();
Return a new XPath
using the underlying object model determined when the XPathFactory was instantiated.
From source file:nz.govt.natlib.ndha.wctdpsdepositor.extractor.XPathWctMetsExtractor.java
public void parseFile(byte[] wctMets, String fileName, FileArchiveBuilder fileBuilder) { try {//from w ww .java 2s . c o m InputStream inputStream = populateInputStreamFrom(wctMets); Document doc = createXmlDocumentFrom(inputStream); XPathFactory xPathFactory = XPathFactory.newInstance(); XPath xpath = xPathFactory.newXPath(); xpath.setNamespaceContext(new WctNamespaceContext()); preprocess(doc, xpath, fileBuilder); popualteHarvestDate(doc, xpath); populateSeedUrlsFrom(doc, xpath); populateTargetName(doc, xpath); populateCreatedBy(doc, xpath); populateCreationDate(doc, xpath); populateProvenanceNote(doc, xpath); populateCopyrightStatement(doc, xpath); populateCopyrightURL(doc, xpath); populateAccessRestrictions(doc, xpath); populateArchiveFiles(doc, xpath, fileBuilder); populateLogFiles(doc, xpath, fileBuilder); populateReportFiles(doc, xpath, fileBuilder); populateHomeDirectoryFiles(doc, xpath, fileBuilder); // Populate anything additional populateAdditional(doc, xpath, fileBuilder); inputStream = populateInputStreamFrom(wctMets); populateWctMets(inputStream, fileName); } catch (XPathExpressionException xpe) { throw new RuntimeException("An exception occurred while parsing the WCT METS document for " + fileName, xpe); } }
From source file:org.adl.sequencer.ADLSeqUtilities.java
/** * Initializes one activity (<code>SeqActivity</code>) that will be added to * an activity tree./*from www .ja v a 2 s . co m*/ * * @param iNode A node from the DOM tree of an element containing * sequencing information. * * @param iColl The collection of reusable sequencing information. * * @return An initialized activity (<code>SeqActivity</code>), or <code> * null</code> if there was an error initializing the activity. */ private static SeqActivity buildActivityNode(Node iNode, Node iColl) { if (_Debug) { System.out.println(" :: ADLSeqUtilities --> BEGIN - " + "buildActivityNode"); } SeqActivity act = new SeqActivity(); boolean error = false; String tempVal = null; // Set the activity's ID -- this is a required attribute act.setID(ADLSeqUtilities.getAttribute(iNode, "identifier")); // Get the activity's resource ID -- if it exsits tempVal = ADLSeqUtilities.getAttribute(iNode, "identifierref"); if (tempVal != null) { if (!isEmpty(tempVal)) { act.setResourceID(tempVal); } } // Check if the activity is visible tempVal = ADLSeqUtilities.getAttribute(iNode, "isvisible"); if (tempVal != null) { if (!isEmpty(tempVal)) { act.setIsVisible((Boolean.valueOf(tempVal)).booleanValue()); } } // Get the children elements of this activity NodeList children = iNode.getChildNodes(); // Initalize this activity from the information in the DOM for (int i = 0; i < children.getLength(); i++) { Node curNode = children.item(i); // Check to see if this is an element node. if (curNode.getNodeType() == Node.ELEMENT_NODE) { if (curNode.getLocalName().equals("item")) { if (_Debug) { System.out.println(" ::--> Found an <item> element"); } // Initialize the nested activity SeqActivity nestedAct = ADLSeqUtilities.buildActivityNode(curNode, iColl); // Make sure this activity was created successfully if (nestedAct != null) { if (_Debug) { System.out.println(" ::--> Adding child"); } act.addChild(nestedAct); } else { error = true; } } else if (curNode.getLocalName().equals("title")) { if (_Debug) { System.out.println(" ::--> Found the <title> element"); } act.setTitle(ADLSeqUtilities.getElementText(curNode, null)); } else if (curNode.getLocalName().equals("sequencing")) { if (_Debug) { System.out.println(" ::--> Found the <sequencing> element"); } Node seqInfo = curNode; // Check to see if the sequencing information is referenced in // the <sequencingCollection> tempVal = ADLSeqUtilities.getAttribute(curNode, "IDRef"); if (tempVal != null) { // Combine local and global sequencing information // Get the referenced Global sequencing information String search = "imsss:sequencing[@ID='" + tempVal + "']"; if (_Debug) { System.out.println(" ::--> Looking for XPATH --> " + search); } // Use the referenced set of sequencing information Node seqGlobal = null; XPathFactory pathFactory = XPathFactory.newInstance(); XPath path = pathFactory.newXPath(); try { seqGlobal = (Node) path.evaluate(search, iColl, XPathConstants.NODE); //XPathAPI.selectSingleNode(iColl, search); } catch (Exception e) { if (_Debug) { System.out.println(" ::--> ERROR : In transform"); e.printStackTrace(); } } if (seqGlobal != null) { if (_Debug) { System.out.println(" ::--> FOUND"); } } else { if (_Debug) { System.out.println(" ::--> ERROR: Not Found"); } seqInfo = null; error = true; } if (!error) { // Clone the global node seqInfo = seqGlobal.cloneNode(true); // Loop through the local sequencing element NodeList seqChildren = curNode.getChildNodes(); for (int j = 0; j < seqChildren.getLength(); j++) { Node curChild = seqChildren.item(j); // Check to see if this is an element node. if (curChild.getNodeType() == Node.ELEMENT_NODE) { if (_Debug) { System.out.println(" ::--> Local definition"); System.out.println(" ::--> " + j); System.out.println(" ::--> <" + curChild.getLocalName() + ">"); } // Add this to the global sequencing info try { seqInfo.appendChild(curChild); } catch (org.w3c.dom.DOMException e) { if (_Debug) { System.out.println(" ::--> ERROR: "); e.printStackTrace(); } error = true; seqInfo = null; } } } } } // If we have an node to look at, extract its sequencing info if (seqInfo != null) { // Record this activity's sequencing XML fragment // XMLSerializer serializer = new XMLSerializer(); // -+- TODO -+- // serializer.setNewLine("CR-LF"); // act.setXMLFragment(serializer.writeToString(seqInfo)); // Extract the sequencing information for this activity error = !ADLSeqUtilities.extractSeqInfo(seqInfo, act); if (_Debug) { System.out.println(" ::--> Extracted Sequencing Info"); } } } } } // Make sure this activity either has an associated resource or children if (act.getResourceID() == null && !act.hasChildren(true)) { // This is not a vaild activity -- ignore it error = true; } // If the activity failed to initialize, clear the variable if (error) { act = null; } if (_Debug) { System.out.println(" ::--> error == " + error); System.out.println(" :: ADLSeqUtilities --> END - " + "buildActivityNode"); } return act; }
From source file:org.ala.documentmapper.FlickrDocumentMapper.java
@SuppressWarnings({ "unchecked", "rawtypes" }) private void handleMachineTag(ParsedDocument parsedDoc, Document xmlDocument, String subject, String xpathString) throws Exception { //"/rsp/photo/tags/tag[@machine_tag=1]/@raw[starts-with(., 'taxonomy:binomial')]" // String tag = getXPathSingleValue(xmlDocument, xpath); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); NodeList nodes = (NodeList) xpath.evaluate(xpathString, xmlDocument, XPathConstants.NODESET); boolean gotSciName = false; String subspecies = null;/*from www .j a v a 2 s .co m*/ String genus = null; String family = null; String order = null; String suborder = null; String kingdom = null; for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); String machineTag = node.getNodeValue(); int charIdx = machineTag.indexOf('='); if (charIdx > 0) { String scientificName = machineTag.substring(charIdx + 1); scientificName = scientificName.trim(); if (machineTag != null) { machineTag = machineTag.toLowerCase(); if (machineTag.contains("binomial")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.SPECIES.toString(), scientificName)); parsedDoc.getTriples() .add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), scientificName)); gotSciName = true; } else if (machineTag.contains("trinomial")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.SUBSPECIES.toString(), scientificName)); subspecies = scientificName; // } else if(machineTag.contains("common")){ // parsedDoc.getTriples().add(new Triple(subject, Predicates.COMMON_NAME.toString(), scientificName)); } else if (machineTag.contains("genus")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.GENUS.toString(), scientificName)); genus = scientificName; } else if (machineTag.contains("family")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.FAMILY.toString(), scientificName)); family = scientificName; } else if (machineTag.contains("order")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.ORDER.toString(), scientificName)); order = scientificName; } else if (machineTag.contains("suborder")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.SUBORDER.toString(), scientificName)); suborder = scientificName; } else if (machineTag.contains("kingdom")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.KINGDOM.toString(), scientificName)); kingdom = scientificName; } else if (machineTag.contains("scientific")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), scientificName)); gotSciName = true; } else if (machineTag.contains("country")) { parsedDoc.getTriples() .add(new Triple(subject, Predicates.COUNTRY.toString(), scientificName)); } } } } if (!gotSciName) { if (subspecies != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), subspecies)); } else if (genus != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), genus)); } else if (family != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), family)); } else if (order != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), order)); } else if (suborder != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), suborder)); } else if (kingdom != null) { parsedDoc.getTriples().add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), kingdom)); } } }
From source file:org.ala.documentmapper.XMLDocumentMapper.java
/** * Map the fields configured in the supplied <code>mappingList</code>. * /*w w w. j ava2s. co m*/ * @param mappingList * @param document * @param parsedDoc * @param isDublinCore */ private void doMapping(List<Mapping> mappingList, Document document, ParsedDocument parsedDoc, boolean isDublinCore) { XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); // if (getNamespaceContext() != null) // xpath.setNamespaceContext(getNamespaceContext()); for (Mapping mapping : mappingList) { if (mapping.mappingType == MappingType.XPATH) { performXPathMapping(document, parsedDoc, isDublinCore, xpath, mapping); } else if (mapping.mappingType == MappingType.REGEX) { performRegexMapping(document, parsedDoc, isDublinCore, mapping); } } }
From source file:org.ala.documentmapper.XMLDocumentMapper.java
/** * Uses the supplied xpath to retrieve values * /*from ww w .j ava 2 s .co m*/ * @param document * @param xpathAsString * @return * @throws Exception */ protected List<String> getXPathValues(Document document, String xpathAsString) throws Exception { XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); List<String> extractedValues = new ArrayList<String>(); NodeList nodes = (NodeList) xpath.evaluate(xpathAsString, document, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { String value = extractValue(nodes.item(i)); value = StringUtils.trimToNull(value); if (value != null) { extractedValues.add(value); } } return extractedValues; }
From source file:org.ala.documentmapper.XMLDocumentMapper.java
/** * Uses the supplied xpath to retrieve values * /*from ww w. j a v a2 s .c om*/ * @param document * @param xpathAsString * @return * @throws Exception */ protected String getXPathSingleValue(Document document, String xpathAsString) throws Exception { XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); NodeList nodes = null; try { nodes = (NodeList) xpath.evaluate(xpathAsString, document, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { String value = extractValue(nodes.item(i)); value = StringUtils.trimToNull(value); if (value != null) { return value; } } } catch (XPathException e) { String value = (String) xpath.evaluate(xpathAsString, document, XPathConstants.STRING); return value; } return null; }
From source file:org.alfresco.po.share.dashlet.AbstractDashlet.java
/** * Parses the xml string of the original-title attribute element to get tooltip * data for report dashlets//from www .j av a 2 s.c o m * * @param xml String * @param element String * @return String */ protected String getElement(String xml, String element) throws Exception { String tooltipElement = " "; xml = xml.replaceAll("alt=\"avatar\">", "alt=\"avatar\"/>"); xml = xml.replaceAll("<br>", ""); XPathFactory xpathFactory = XPathFactory.newInstance(); XPath xpath = xpathFactory.newXPath(); InputSource source = new InputSource(new StringReader(xml)); try { tooltipElement = (String) xpath.evaluate(element, source, XPathConstants.STRING); } catch (XPathExpressionException ee) { logger.error("Cannot parse xml string " + ee); } return tooltipElement; }
From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java
/** * Returns a list of ref nodes from the ref-list of the DOM. * * @param doc DOM representation of the XML * @return NodeList of ref elements// w ww . j a v a 2s. com * @throws XPathExpressionException */ private NodeList getReferenceNodes(Document doc) throws XPathExpressionException { XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); XPathExpression expr = xpath.compile("//back/ref-list[title='References']/ref"); Object result = expr.evaluate(doc, XPathConstants.NODESET); NodeList refList = (NodeList) result; if (refList.getLength() == 0) { expr = xpath.compile("//back/ref-list/ref"); result = expr.evaluate(doc, XPathConstants.NODESET); refList = (NodeList) result; } return refList; }
From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java
/** * Get references for a given article// w w w .j a v a 2s. c o m * * @param doc article xml * @return references */ public ArrayList<CitationReference> getReferences(Document doc) { ArrayList<CitationReference> list = new ArrayList<CitationReference>(); if (doc == null) { return list; } try { NodeList refList = getReferenceNodes(doc); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); XPathExpression typeExpr = xpath.compile("//citation | //nlm-citation | //element-citation"); XPathExpression titleExpr = xpath.compile("//article-title"); XPathExpression authorsExpr = xpath.compile("//person-group[@person-group-type='author']/name"); XPathExpression journalExpr = xpath.compile("//source"); XPathExpression volumeExpr = xpath.compile("//volume"); XPathExpression numberExpr = xpath.compile("//label"); XPathExpression fPageExpr = xpath.compile("//fpage"); XPathExpression lPageExpr = xpath.compile("//lpage"); XPathExpression yearExpr = xpath.compile("//year"); XPathExpression publisherExpr = xpath.compile("//publisher-name"); for (int i = 0; i < refList.getLength(); i++) { Node refNode = refList.item(i); CitationReference citation = new CitationReference(); DocumentFragment df = doc.createDocumentFragment(); df.appendChild(refNode); // citation type Object resultObj = typeExpr.evaluate(df, XPathConstants.NODE); Node resultNode = (Node) resultObj; if (resultNode != null) { String citationType = getCitationType(resultNode); if (citationType != null) { citation.setCitationType(citationType); } } // title resultObj = titleExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setTitle(resultNode.getTextContent()); } // authors resultObj = authorsExpr.evaluate(df, XPathConstants.NODESET); NodeList resultNodeList = (NodeList) resultObj; ArrayList<String> authors = new ArrayList<String>(); for (int j = 0; j < resultNodeList.getLength(); j++) { Node nameNode = resultNodeList.item(j); NodeList namePartList = nameNode.getChildNodes(); String surName = ""; String givenName = ""; for (int k = 0; k < namePartList.getLength(); k++) { Node namePartNode = namePartList.item(k); if (namePartNode.getNodeName().equals("surname")) { surName = namePartNode.getTextContent(); } else if (namePartNode.getNodeName().equals("given-names")) { givenName = namePartNode.getTextContent(); } } authors.add(givenName + " " + surName); } citation.setAuthors(authors); // journal title resultObj = journalExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setJournalTitle(resultNode.getTextContent()); } // volume resultObj = volumeExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setVolume(resultNode.getTextContent()); } // citation number resultObj = numberExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setNumber(resultNode.getTextContent()); } // citation pages String firstPage = null; String lastPage = null; resultObj = fPageExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { firstPage = resultNode.getTextContent(); } resultObj = lPageExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { lastPage = resultNode.getTextContent(); } if (firstPage != null) { if (lastPage != null) { citation.setPages(firstPage + "-" + lastPage); } else { citation.setPages(firstPage); } } // citation year resultObj = yearExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setYear(resultNode.getTextContent()); } // citation publisher resultObj = publisherExpr.evaluate(df, XPathConstants.NODE); resultNode = (Node) resultObj; if (resultNode != null) { citation.setPublisher(resultNode.getTextContent()); } list.add(citation); } } catch (Exception e) { log.error("Error occurred while gathering the citation references.", e); } return list; }
From source file:org.ambraproject.service.article.FetchArticleServiceImpl.java
/** * Decorates the citation elements of the XML DOM with extra information from the citedArticle table in the DB. An * extraCitationInfo element is appended to each citation element. It will contain between one and two attributes * with the extra info: citedArticleID, the DB primary key, and doi, the DOI string, if it exists. * * @param doc DOM of the XML//from www. jav a 2 s .c om * @param citedArticles List of CitedArticle persistent objects * @return modified DOM * @throws ApplicationException */ private Document addExtraCitationInfo(Document doc, List<CitedArticle> citedArticles) throws ApplicationException { if (citedArticles.isEmpty()) { return doc; // This happens in some unit tests. } try { NodeList referenceList = getReferenceNodes(doc); // If sortOrder on citedArticle has duplicate value, you will get below error.Ideally it should not happen // but since sortOrder is not unique it may be possible to update that field from backend to have duplicate value // Now index is on sortOrder(article.hbm.xml), index will be only on one of those of duplicate value and // hence citedArticle will have less count then the xml. if (referenceList.getLength() != citedArticles.size()) { throw new ApplicationException(String.format("Article has %d citedArticles but %d references", citedArticles.size(), referenceList.getLength())); } XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); XPathExpression citationExpr = xpath .compile("./citation|./nlm-citation|./element-citation|./mixed-citation"); for (int i = 0; i < referenceList.getLength(); i++) { Node referenceNode = referenceList.item(i); Node citationNode = (Node) citationExpr.evaluate(referenceNode, XPathConstants.NODE); CitedArticle citedArticle = citedArticles.get(i); if (citationNode != null && "journal".equals(getCitationType(citationNode)) && citedArticleIsValid(citedArticle)) { Element extraInfo = doc.createElement("extraCitationInfo"); setExtraCitationInfo(extraInfo, citedArticle); citationNode.appendChild(extraInfo); } } } catch (XPathExpressionException xpee) { throw new ApplicationException(xpee); } return doc; }