List of usage examples for javax.xml.xpath XPathExpression evaluate
public Object evaluate(InputSource source, QName returnType) throws XPathExpressionException;
From source file:com.ephesoft.dcma.util.XMLUtil.java
/** * @param doc {@link org.w3c.dom.Document} * @param xPathExpression {@link String} * @return//w w w . j a v a 2 s . com */ public static String getValueFromXML(final Document doc, final String xPathExpression) throws XPathExpressionException { XPath xpath = XPathFactory.newInstance().newXPath(); String requiredValue = ""; XPathExpression expr = xpath.compile(xPathExpression); Object result = expr.evaluate(doc, XPathConstants.NODESET); NodeList nodes = (NodeList) result; Node item = nodes.item(0); if (item != null) { requiredValue = item.getFirstChild().getNodeValue(); } return requiredValue; }
From source file:be.fedict.eid.applet.service.signer.odf.ODFUtil.java
/** * Check if an ODF package is self-contained, i.e. content files don't have * OLE objects linked to external files/*from w w w . ja v a 2s .com*/ * * @param odfUrl * @return * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws XPathExpressionException */ public static boolean isSelfContained(URL odfUrl) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { InputStream odfInputStream = odfUrl.openStream(); List zipEntries = getZipEntriesAsList(odfInputStream); odfInputStream = odfUrl.openStream(); ZipInputStream odfZipInputStream = new ZipInputStream(odfInputStream); ZipEntry zipEntry; XPathFactory factory = XPathFactory.newInstance(); /* Maybe a bit overkill, but implementations can use other prefixes */ ODFNamespaceContext namespaceContext = new ODFNamespaceContext(); XPath xpath = factory.newXPath(); xpath.setNamespaceContext(namespaceContext); XPathExpression expression = xpath.compile("//draw:object/@xlink:href|" + "//draw:object-ole/@xlink:href|" + "//draw:image/@xlink:href|" + "//draw:floating-frame/@xlink:href"); while (null != (zipEntry = odfZipInputStream.getNextEntry())) { if (isContentFile(zipEntry)) { /* TODO: pure SAX is probably more memory-efficient */ Document content = ODFUtil.loadDocument(odfZipInputStream); NodeList nodes = (NodeList) expression.evaluate(content, XPathConstants.NODESET); return checkNodes(nodes, zipEntries); } } return true; }
From source file:gov.tva.sparky.hbase.RestProxy.java
/** * /*from ww w . jav a 2 s . c o m*/ * @param strTablename * @param strRowKey * @param strColumn * @param strQualifier * @return Returns the values from a data cell in HBase. * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public static byte[] QueryHBaseForCell(String strTablename, String strRowKey, String strColumn, String strQualifier) throws ParserConfigurationException, SAXException, IOException { // Configuration Configuration conf = new Configuration(false); conf.addResource("hadoop-default.xml"); conf.addResource("sparky-site.xml"); int port = conf.getInt("sparky.hbase.restPort", 8092); String uri = conf.get("sparky.hbase.restURI", "http://socdvmhbase"); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // never forget this! factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); String strRestPath = uri + ":" + port + "/" + strTablename + "/" + strRowKey + "/" + strColumn + ":" + strQualifier; Document doc = null; try { doc = builder.parse(strRestPath); } catch (FileNotFoundException e) { //System.out.println("RestProxy > Exception: ( " + strRestPath + " )"); } if (null == doc) return null; XPathFactory xpath_factory = XPathFactory.newInstance(); XPath xpath = xpath_factory.newXPath(); XPathExpression expr = null; try { expr = xpath.compile("/CellSet/Row/Cell/text()"); } catch (XPathExpressionException e) { // TODO Auto-generated catch block e.printStackTrace(); } Object result = null; try { result = expr.evaluate(doc, XPathConstants.NODESET); } catch (XPathExpressionException e) { // TODO Auto-generated catch block e.printStackTrace(); } NodeList nodes = (NodeList) result; String cell_value = nodes.item(0).getNodeValue(); Base64 decoder = new Base64(); byte[] decodedValue = decoder.decode(cell_value.getBytes()); return decodedValue; }
From source file:com.seer.datacruncher.utils.generic.CommonUtils.java
public static NodeList readXMLNodes(Document doc, String xpathExpression) throws Exception { XPath xpath = XPathFactory.newInstance().newXPath(); XPathExpression expr = xpath.compile(xpathExpression); Object result = expr.evaluate(doc, XPathConstants.NODESET); return (NodeList) result; }
From source file:com.seer.datacruncher.utils.generic.CommonUtils.java
/** * Method parse the schema file and identify the xpath expression for nodes that have annotation value * annotation could be @spellchaeck , @partitaiva, @codicefiscale, etc. * @param xmlSchema//from ww w . j av a 2 s.c om * @param annotation * @throws javax.xml.parsers.ParserConfigurationException * @throws org.xml.sax.SAXException * @throws IOException * @throws javax.xml.xpath.XPathExpressionException */ public static Set<String> parseSchemaAndGetXPathSetForAnnotation(ByteArrayInputStream xmlSchema, String annotation) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException { Document doc = docBuilder.parse(xmlSchema); XPathExpression expr = xpathInstance.compile("//annotation/appinfo/text()"); Object result = expr.evaluate(doc, XPathConstants.NODESET); NodeList nodes = (NodeList) result; Set<String> set = new HashSet<String>(); for (int i = 0; i < nodes.getLength(); i++) { if (annotation.equals(nodes.item(i).getNodeValue())) { set.add(fetchXPathXpressionOfNode(nodes.item(i))); } } return set; }
From source file:edu.virginia.speclab.juxta.author.model.JuxtaXMLParser.java
static public String getIndexBasedXPathForGeneralXPath(String xpathString, String xml) { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try {// ww w. j a v a2 s . c om factory.setNamespaceAware(false); // ignore the horrible issues of namespacing DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(new InputSource(new StringReader(xml))); XPathFactory xpathFactory = XPathFactory.newInstance(); XPath xpath = xpathFactory.newXPath(); Node root = doc.getFirstChild(); XPathExpression expr = xpath.compile(xpathString); Node node = (Node) expr.evaluate(doc, XPathConstants.NODE); return nodeToSimpleXPath(node, root); } catch (SAXException ex) { } catch (IOException ex) { } catch (XPathExpressionException ex) { } catch (ParserConfigurationException ex) { } return null; }
From source file:com.odoko.solrcli.actions.CrawlPostAction.java
/** * Gets all nodes matching an XPath//www .j av a2s. c o m */ public static NodeList getNodesFromXP(Node n, String xpath) throws XPathExpressionException { XPathFactory factory = XPathFactory.newInstance(); XPath xp = factory.newXPath(); XPathExpression expr = xp.compile(xpath); return (NodeList) expr.evaluate(n, XPathConstants.NODESET); }
From source file:com.ephesoft.dcma.util.OCREngineUtil.java
/** * To format HOCR for Tesseract.//from w w w.j av a 2 s . c o m * @param outputFilePath {@link String} * @param actualFolderLocation {@link String} * @param pageId {@link String} * @throws XPathExpressionException if error occurs * @throws TransformerException if error occurs * @throws IOException if error occurs */ public static void formatHOCRForTesseract(final String outputFilePath, final String actualFolderLocation, final String pageId) throws XPathExpressionException, TransformerException, IOException { LOGGER.info("Entering format HOCR for tessearct . outputfilepath : " + outputFilePath); InputStream inputStream = new FileInputStream(outputFilePath); XPathFactory xFactory = new org.apache.xpath.jaxp.XPathFactoryImpl(); XPath xpath = xFactory.newXPath(); XPathExpression pageExpr = xpath.compile("//div[@class=\"ocr_page\"]"); XPathExpression wordExpr = xpath.compile("//span[@class=\"ocr_word\"]"); // Output format supported by Tesseract 3.00 XPathExpression xOcrWordExpr = xpath.compile("//span[@class=\"xocr_word\"]"); // Output format supported by Tesseract 3.01 XPathExpression ocrXWordExpr = xpath.compile("//span[@class=\"ocrx_word\"]"); org.w3c.dom.Document doc2 = null; try { doc2 = XMLUtil.createDocumentFrom(inputStream); } catch (Exception e) { LOGGER.info("Premature end of file for " + outputFilePath + e); } finally { IOUtils.closeQuietly(inputStream); } if (doc2 != null) { LOGGER.info("document is not null."); NodeList wordList = (NodeList) wordExpr.evaluate(doc2, XPathConstants.NODESET); for (int wordNodeIndex = 0; wordNodeIndex < wordList.getLength(); wordNodeIndex++) { setWordNodeTextContent(xOcrWordExpr, ocrXWordExpr, wordList, wordNodeIndex); } NodeList pageList = (NodeList) pageExpr.evaluate(doc2, XPathConstants.NODESET); for (int pageNodeIndex = 0; pageNodeIndex < pageList.getLength(); pageNodeIndex++) { Node pageNode = pageList.item(pageNodeIndex); if (pageNode != null && ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) != null) { String pageID = ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) .getTextContent(); wordExpr = xpath.compile("//div[@id='" + pageID + "']//span[@class='ocr_word']"); NodeList wordInPageList = (NodeList) wordExpr.evaluate(pageNode, XPathConstants.NODESET); Node pageNodeClone = pageNode.cloneNode(false); for (int i = 0; i < wordInPageList.getLength(); i++) { pageNodeClone.appendChild(wordInPageList.item(i)); } pageNode.getParentNode().appendChild(pageNodeClone); pageNode.getParentNode().removeChild(pageNode); } } XMLUtil.flushDocumentToFile(doc2.getDocumentElement().getOwnerDocument(), outputFilePath); File tempFile = new File(actualFolderLocation + File.separator + pageId + "_tempFile_hocr.html"); FileUtils.copyFile(new File(outputFilePath), tempFile); XMLUtil.htmlOutputStream(tempFile.getAbsolutePath(), outputFilePath); boolean isTempFileDeleted = tempFile.delete(); if (!isTempFileDeleted) { tempFile.delete(); } } LOGGER.info("Exiting format HOCR for tessearct . outputfilepath : " + outputFilePath); }
From source file:cz.incad.kramerius.virtualcollections.VirtualCollectionsManager.java
public static VirtualCollection doVC(String pid, FedoraAccess fedoraAccess, ArrayList<String> languages) { try {// w w w .j av a 2s . c o m String xPathStr; XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); XPathExpression expr; ArrayList<String> langs = new ArrayList<String>(); if (languages == null || languages.isEmpty()) { String[] ls = KConfiguration.getInstance().getPropertyList("interface.languages"); for (int i = 0; i < ls.length; i++) { String lang = ls[++i]; langs.add(lang); } } else { langs = new ArrayList<String>(languages); } String name = ""; boolean canLeave = true; fedoraAccess.getDC(pid); Document doc = fedoraAccess.getDC(pid); xPathStr = "//dc:title/text()"; expr = xpath.compile(xPathStr); Node node = (Node) expr.evaluate(doc, XPathConstants.NODE); if (node != null) { name = StringEscapeUtils.escapeXml(node.getNodeValue()); } xPathStr = "//dc:type/text()"; expr = xpath.compile(xPathStr); node = (Node) expr.evaluate(doc, XPathConstants.NODE); if (node != null) { canLeave = Boolean.parseBoolean(StringEscapeUtils.escapeXml(node.getNodeValue())); } VirtualCollection vc = new VirtualCollection(name, pid, canLeave); for (String lang : langs) { String dsName = TEXT_DS_PREFIX + lang; String value = IOUtils.readAsString(fedoraAccess.getDataStream(pid, dsName), Charset.forName("UTF8"), true); vc.addDescription(lang, value); } return vc; } catch (Exception vcex) { logger.log(Level.WARNING, "Could not get virtual collection for " + pid + ": " + vcex.toString()); return null; } }
From source file:cz.mzk.editor.server.fedora.utils.FedoraUtils.java
/** * Gets the rdf pids./*from www.java 2 s . com*/ * * @param pid * the pid * @param relation * the relation * @return the rdf pids */ public static ArrayList<String> getRdfPids(String pid, String relation) { ArrayList<String> pids = new ArrayList<String>(); try { String command = configuration.getFedoraHost() + "/get/" + pid + "/" + RELS_EXT_STREAM; InputStream is = RESTHelper.get(command, configuration.getFedoraLogin(), configuration.getFedoraPassword(), true); Document contentDom = XMLUtils.parseDocument(is); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); String xPathStr = "/RDF/Description/" + relation; XPathExpression expr = xpath.compile(xPathStr); NodeList nodes = (NodeList) expr.evaluate(contentDom, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { Node childnode = nodes.item(i); if (!childnode.getNodeName().contains("hasModel")) { pids.add(childnode.getNodeName() + " " + childnode.getAttributes().getNamedItem("rdf:resource").getNodeValue().split("/")[1]); } } } catch (Exception e) { LOGGER.error(e.getMessage(), e); } return pids; }