List of usage examples for javax.xml.xpath XPathConstants STRING
QName STRING
To view the source code for javax.xml.xpath XPathConstants STRING.
Click Source Link
The XPath 1.0 string data type.
Maps to Java String .
From source file:sf.net.experimaestro.manager.js.XPMObject.java
/** * Runs an XPath//w w w .j ava 2s . c o m * * @param path * @param xml * @return * @throws javax.xml.xpath.XPathExpressionException */ static public Object js_xpath(String path, Object xml) throws XPathExpressionException { Node dom = (Node) JSUtils.toDOM(null, xml); XPath xpath = XPathFactory.newInstance().newXPath(); xpath.setNamespaceContext(new NSContext(dom)); XPathFunctionResolver old = xpath.getXPathFunctionResolver(); xpath.setXPathFunctionResolver(new XPMXPathFunctionResolver(old)); XPathExpression expression = xpath.compile(path); String list = (String) expression.evaluate( dom instanceof Document ? ((Document) dom).getDocumentElement() : dom, XPathConstants.STRING); return list; }
From source file:tufts.vue.ds.XMLIngest.java
static void XPathExtract(XmlSchema schema, Document document) { try {// w w w. jav a2 s. c o m XPath xpath = XPathFactory.newInstance().newXPath(); String expression = "/rss/channel/item"; //String expression = "rss/channel/item/title"; errout("Extracting " + expression); // First, obtain the element as a node. //tufts.DocDump.dump(document); Node nodeValue = (Node) xpath.evaluate(expression, document, XPathConstants.NODE); errout(" Node: " + nodeValue); // Next, obtain the element as a String. String stringValue = (String) xpath.evaluate(expression, document, XPathConstants.STRING); System.out.println(" String: " + stringValue); NodeList nodeSet = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET); errout("NodeSet: " + Util.tag(nodeSet) + "; size=" + nodeSet.getLength()); for (int i = 0; i < nodeSet.getLength(); i++) { scanNode(schema, nodeSet.item(i), null, null); } // // Finally, obtain the element as a Number (Double). // Double birthdateDouble = (Double) xpath.evaluate(expression, document, XPathConstants.NUMBER); // System.out.println("Double is: " + birthdateDouble); } catch (XPathExpressionException e) { System.err.println("XPathExpressionException caught..."); e.printStackTrace(); } catch (Throwable t) { t.printStackTrace(); } }
From source file:ua.utility.kfsdbupgrade.MaintainableXMLConversionServiceImpl.java
/** * Does the following://from w w w .j av a 2 s . c o m * <ol> * <li>Recursively calls this method on all child elements of * <code>talist</code> to handle any child lists first * <li>Remove the attributes {@link #SERIALIZATION_ATTRIBUTE} and * {@link #CLASS_ATTRIBUTE} of <code>talist</code></li> * <li>If * <code>//[talist.getNodeName()]/org.apache.ojb.broker.core.proxy.ListProxyDefaultImpl/default/size/</code> * evaluates to a value >1, indicating elements in this list, call * {@link #transformClassNode(Document, Node)} on that element and store to * readd</li> * <li>Remove all child elements of <code>talist</code></li> * <li>Readd list elements calculated and transformed above</li> * </ol> * * @param document * Root {@link Document} * @param xpath * {@link XPath} to use during evaluation * @param talist * {@link Element} to process typed array lists on * @throws XPathExpressionException * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InvocationTargetException * @throws InvocationTargetException * @throws NoSuchMethodException * @throws InstantiationException */ private void handleTypedArrayList(Document document, XPath xpath, Element talist) throws XPathExpressionException, ClassNotFoundException, IllegalAccessException, InvocationTargetException, InvocationTargetException, NoSuchMethodException, InstantiationException { LOGGER.trace("Handling typed array list: " + talist.getNodeName()); XPathExpression getChildTypedArrayLists = xpath .compile(".//*[@class='org.kuali.rice.kns.util.TypedArrayList']"); NodeList nodeList = (NodeList) getChildTypedArrayLists.evaluate(talist, XPathConstants.NODESET); // handle any child lists first for (int i = 0; i < nodeList.getLength(); ++i) { Node item = nodeList.item(i); handleTypedArrayList(document, xpath, (Element) item); } talist.removeAttribute(SERIALIZATION_ATTRIBUTE); talist.removeAttribute(CLASS_ATTRIBUTE); XPathExpression listSizeExpression = xpath.compile("//" + talist.getNodeName() + "/org.apache.ojb.broker.core.proxy.ListProxyDefaultImpl/default/size/text()"); String size = (String) listSizeExpression.evaluate(talist, XPathConstants.STRING); List<Node> nodesToAdd = new ArrayList<Node>(); if (StringUtils.isNotBlank(size) && Integer.valueOf(size) > 0) { XPathExpression listTypeExpression = xpath.compile("//" + talist.getNodeName() + "/org.kuali.rice.kns.util.TypedArrayList/default/listObjectType/text()"); String listType = (String) listTypeExpression.evaluate(talist, XPathConstants.STRING); XPathExpression listContentsExpression = xpath.compile("//" + talist.getNodeName() + "/org.apache.ojb.broker.core.proxy.ListProxyDefaultImpl/" + listType); NodeList listContents = (NodeList) listContentsExpression.evaluate(talist, XPathConstants.NODESET); for (int i = 0; i < listContents.getLength(); i++) { Node tempNode = listContents.item(i); transformClassNode(document, tempNode); nodesToAdd.add(tempNode); } } for (Node removeNode = talist.getFirstChild(); removeNode != null;) { Node nextRemoveNode = removeNode.getNextSibling(); talist.removeChild(removeNode); removeNode = nextRemoveNode; } for (Node nodeToAdd : nodesToAdd) { talist.appendChild(nodeToAdd); } }
From source file:ubic.gemma.core.loader.entrez.pubmed.PubMedXMLParser.java
private Node processRecord(BibliographicReference bibRef, Node record) { Node article = null;//from w w w. j a va 2 s.c o m NodeList recordNodes = record.getChildNodes(); for (int p = 0; p < recordNodes.getLength(); p++) { Node item = recordNodes.item(p); if (!(item instanceof Element)) { continue; } String name = item.getNodeName(); switch (name) { case "Article": article = item; break; case "ChemicalList": bibRef.setChemicals(this.extractChemicals(item)); break; case "MeshHeadingList": this.processMESH(item, bibRef); break; case "KeywordList": bibRef.setKeywords(this.extractKeywords(item)); break; case "MedlineJournalInfo": { NodeList jNodes = item.getChildNodes(); for (int q = 0; q < jNodes.getLength(); q++) { Node jitem = jNodes.item(q); if (!(jitem instanceof Element)) { continue; } if (jitem.getNodeName().equals("MedlineTA")) { bibRef.setPublication(XMLUtils.getTextValue((Element) jitem)); } } break; } case "PMID": this.processAccession(bibRef, item); break; case "CommentsCorrectionsList": NodeList jNodes = item.getChildNodes(); for (int q = 0; q < jNodes.getLength(); q++) { Node jitem = jNodes.item(q); if (!(jitem instanceof Element)) { continue; } Node reftype = jitem.getAttributes().getNamedItem("RefType"); if (reftype == null) continue; String reftypeName = ((Attr) reftype).getValue(); PubMedXMLParser.log.debug(reftypeName); if (reftypeName.equals("RetractionIn")) { try { XPathFactory xf = XPathFactory.newInstance(); XPath xpath = xf.newXPath(); XPathExpression xgds = xpath.compile("RefSource/text()"); String ref = (String) xgds.evaluate(jitem, XPathConstants.STRING); xgds = xpath.compile("PMID/text()"); String pmid = (String) xgds.evaluate(jitem, XPathConstants.STRING); String description = "Retracted [In: " + ref + " PMID=" + pmid + "]"; bibRef.setDescription(description); } catch (XPathExpressionException e) { PubMedXMLParser.log.warn( "Error while trying to get details of the retraction: " + e.getMessage(), e); } /* * Such papers also have <PublicationType>Retracted Publication</PublicationType> */ } } break; default: log.warn("Unrecognized node name " + name); } } return article; }
From source file:ubic.gemma.core.loader.expression.geo.service.GeoBrowserServiceImpl.java
/** * Take the details string from GEO and make it nice. Add links to series and platforms that are already in gemma. * * @param details XML from eSummary//w w w .java 2s . co m * @return HTML-formatted */ String formatDetails(String details) throws IOException { /* * Bug 2690. There must be a better way. */ details = details.replaceAll("encoding=\"UTF-8\"", ""); try { Document document = EutilFetch.parseStringInputStream(details); String gse = "GSE" + xgse.evaluate(document, XPathConstants.STRING); String title = (String) xtitle.evaluate(document, XPathConstants.STRING); NodeList gpls = (NodeList) xgpls.evaluate(document, XPathConstants.NODESET); String summary = (String) xsummary.evaluate(document, XPathConstants.STRING); StringBuilder buf = new StringBuilder(); buf.append("<div class=\"small\">"); ExpressionExperiment ee = this.expressionExperimentService.findByShortName(gse); if (ee != null) { buf.append("\n<p><strong><a target=\"_blank\" href=\"").append(Settings.getRootContext()) .append("/expressionExperiment/showExpressionExperiment.html?id=").append(ee.getId()) .append("\">").append(gse).append("</a></strong>"); } else { buf.append("\n<p><strong>").append(gse).append(" [new to Gemma]</strong>"); } buf.append("<p>").append(title).append("</p>\n"); buf.append("<p>").append(summary).append("</p>\n"); this.formatArrayDetails(gpls, buf); buf.append("</div>"); details = buf.toString(); // } } catch (ParserConfigurationException | SAXException | XPathExpressionException e) { throw new RuntimeException(e); } return details; }
From source file:ubic.gemma.loader.entrez.pubmed.PubMedXMLParser.java
private Node processRecord(BibliographicReference bibRef, Node record) throws IOException { Node article = null;//from ww w .ja va 2 s .c om NodeList recordNodes = record.getChildNodes(); for (int p = 0; p < recordNodes.getLength(); p++) { Node item = recordNodes.item(p); if (!(item instanceof Element)) { continue; } String name = item.getNodeName(); if (name.equals("Article")) { article = item; } else if (name.equals("ChemicalList")) { bibRef.setChemicals(extractChemicals(item)); } else if (name.equals("MeshHeadingList")) { processMESH(item, bibRef); } else if (name.equals("KeywordList")) { bibRef.setKeywords(extractKeywords(item)); } else if (name.equals("MedlineJournalInfo")) { NodeList jNodes = item.getChildNodes(); for (int q = 0; q < jNodes.getLength(); q++) { Node jitem = jNodes.item(q); if (!(jitem instanceof Element)) { continue; } if (jitem.getNodeName().equals("MedlineTA")) { bibRef.setPublication(XMLUtils.getTextValue((Element) jitem)); } } } else if (name.equals("PMID")) { processAccession(bibRef, item); } else if (name.equals("CommentsCorrectionsList")) { NodeList jNodes = item.getChildNodes(); for (int q = 0; q < jNodes.getLength(); q++) { Node jitem = jNodes.item(q); if (!(jitem instanceof Element)) { continue; } Node reftype = jitem.getAttributes().getNamedItem("RefType"); if (reftype == null) continue; String reftypeName = ((Attr) reftype).getValue(); log.debug(reftypeName); if (reftypeName.equals("RetractionIn")) { try { XPathFactory xf = XPathFactory.newInstance(); XPath xpath = xf.newXPath(); XPathExpression xgds = xpath.compile("RefSource/text()"); String ref = (String) xgds.evaluate(jitem, XPathConstants.STRING); xgds = xpath.compile("PMID/text()"); String pmid = (String) xgds.evaluate(jitem, XPathConstants.STRING); String description = "Retracted [In: " + ref + " PMID=" + pmid + "]"; bibRef.setDescription(description); } catch (XPathExpressionException e) { log.warn("Error while trying to get details of the retraction: " + e.getMessage(), e); continue; } /* * Such papers also have <PublicationType>Retracted Publication</PublicationType> */ } } } } return article; }
From source file:ubic.gemma.loader.expression.geo.service.GeoBrowserServiceImpl.java
/** * Take the details string from GEO and make it nice. Add links to series and platforms that are already in gemma. * /*from w w w .ja va 2s .c om*/ * @param details XML from eSummary * @return HTML-formatted * @throws IOException */ protected String formatDetails(String details) throws IOException { try { /* * Bug 2690. There must be a better way. */ details = details.replaceAll("encoding=\"UTF-8\"", ""); DocumentBuilder builder = factory.newDocumentBuilder(); StringInputStream is = new StringInputStream(details); Document document = builder.parse(is); NodeList samples = (NodeList) xsamples.evaluate(document, XPathConstants.NODESET); String gds = (String) xgds.evaluate(document, XPathConstants.STRING); // FIXME, use this. String gse = "GSE" + (String) xgse.evaluate(document, XPathConstants.STRING); String title = (String) xtitle.evaluate(document, XPathConstants.STRING); NodeList gpls = (NodeList) xgpls.evaluate(document, XPathConstants.NODESET); // FIXME get description. String summary = (String) xsummary.evaluate(document, XPathConstants.STRING); StringBuilder buf = new StringBuilder(); buf.append("<div class=\"small\">"); ExpressionExperiment ee = this.expressionExperimentService.findByShortName(gse); if (ee != null) { buf.append( "\n<p><strong><a target=\"_blank\" href=\"/Gemma/expressionExperiment/showExpressionExperiment.html?id=" + ee.getId() + "\">" + gse + "</a></strong>"); } else { buf.append("\n<p><strong>" + gse + " [new to Gemma]</strong>"); } buf.append("<p>" + title + "</p>\n"); buf.append("<p>" + summary + "</p>\n"); formatArrayDetails(gpls, buf); for (int i = 0; i < samples.getLength(); i++) { // samples.item( i ) // FIXME use this. } buf.append("</div>"); details = buf.toString(); // } } catch (ParserConfigurationException e) { throw new RuntimeException(e); } catch (SAXException e) { throw new RuntimeException(e); } catch (XPathExpressionException e) { throw new RuntimeException(e); } return details; }
From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.AbstractIndexEnvironment.java
public void indexIncrementalFromXmlDB(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword, String dbName) throws Exception { // I'm upgrading so the baseline is the current nodes number int countNodes = getCountDocuments(); String driverXml = ""; String connectionString = ""; Collection coll;// www. j a v a2 s . c o m IndexWriter w = null; Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>(); logger.info("indexIncrementalFromXmlDB(generic) is starting for [{}], and initially I have[{}] ... ", new Object[] { indexId, countNodes }); try { Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId)); w = openIndex(indexTempDirectory, indexAnalyzer); HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance() .getPreferences().getConfSubset("bs.xmldatabase"); if (null != connsConf) { driverXml = connsConf.getString("driver"); connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName; } else { logger.error("bs.xmldatabase Configuration is missing!!"); } logger.debug("connectionString->" + connectionString); coll = DatabaseManager.getCollection(connectionString); XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0"); DocumentInfo source = null; Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory .getConfiguration(); XPath xp = new XPathEvaluator(config); for (FieldInfo field : fields.values()) { fieldXpe.put(field.name, xp.compile(field.path)); logger.debug("Field Path->[{}]", field.path); } // the xmldatabase is not very correct and have memory problem for // queires with huge results, so its necessary to implement our own // iteration mechanism // // // I will collect all the results // ResourceSet set = service.query(this.env.indexDocumentPath); long numberResults = 0; ResourceSet set = service.query("count(" + indexDocumentPath + ")"); if (set.getIterator().hasMoreResources()) { numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent()); } // TODO:######################################Change this after - // this is just a performance test // float percentage=0.1F; // numberResults=Math.round(numberResults * percentage); logger.debug("Number of results->" + numberResults); long pageSizeDefault = 50000; if (numberResults > 1000000) { pageSizeDefault = 1000000; } long pageNumber = 1; int count = 0; Map<String, AttsInfo[]> cacheAtt = new HashMap<String, AttsInfo[]>(); Map<String, XPathExpression> cacheXpathAtt = new HashMap<String, XPathExpression>(); Map<String, XPathExpression> cacheXpathAttValue = new HashMap<String, XPathExpression>(); while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) { // calculate the last hit long pageInit = (pageNumber - 1) * pageSizeDefault + 1; long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault : (numberResults - pageInit + 1); service = (XPathQueryService) coll.getService("XPathQueryService", "1.0"); // xquery paging using subsequence function long time = System.nanoTime(); // TODO: I'm assuming that there is always an attribute @id in // each element set = service.query("for $x in(subsequence(" + indexDocumentPath + "/@id," + pageInit + "," + pageSize + ")) return string($x)"); double ms = (System.nanoTime() - time) / 1000000d; logger.info("Query XMLDB took ->[{}]", ms); ResourceIterator iter = set.getIterator(); XPath xp2; XPathExpression xpe2; List documentNodes; StringReader reader; // cache of distinct attributes fora each sample group while (iter.hasMoreResources()) { count++; logger.debug("its beeing processed the number ->" + count); String idToProcess = (String) iter.nextResource().getContent(); logger.debug("@id that is being processed->" + idToProcess); // I need to get the sample ResourceSet setid = service.query(indexDocumentPath + "[@id='" + idToProcess + "']"); ResourceIterator iterid = setid.getIterator(); while (iterid.hasMoreResources()) { StringBuilder xml = new StringBuilder(); xml.append((String) iterid.nextResource().getContent()); // logger.debug(xml.toString()); reader = new StringReader(xml.toString()); source = config.buildDocument(new StreamSource(reader)); // logger.debug("XML DB->[{}]", // PrintUtils.printNodeInfo((NodeInfo) source, config)); Document d = new Document(); xp2 = new XPathEvaluator(source.getConfiguration()); int position = indexDocumentPath.lastIndexOf("/"); // TODO: I also need to change this String pathRoot = ""; if (position != -1) { pathRoot = indexDocumentPath.substring(position); } else { pathRoot = indexDocumentPath; } // logger.debug("PathRoot->[{}]",pathRoot); xpe2 = xp2.compile(pathRoot); documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET); for (Object node : documentNodes) { // logger.debug("XML DB->[{}]",PrintUtils.printNodeInfo((NodeInfo)node,config)); String idElement = (String) fieldXpe.get("id").evaluate(node, XPathConstants.STRING); // I need to see if it already exists // I will also add this document if it is nor marked // as "todelete" Boolean toDelete = (Boolean) fieldXpe.get("delete").evaluate(node, XPathConstants.BOOLEAN); // TODO:######################################Change // this after - this is just a performance test int deletePercentage = 10; toDelete = (count % deletePercentage) == 0 ? true : false; logger.debug( "Incremental Update - The document [{}] is being processed and is marked to delete?[{}]", new Object[] { idElement, toDelete }); // I will always try to delete the document (i don't // know if it is new or if it was changed) Term idTerm = new Term("id", idElement.toLowerCase()); int countToDelete = getIndexReader().docFreq(idTerm); if (countToDelete > 0) { // if has more than one, I have to send an email // to warn if (countToDelete > 1) { Application.getInstance().sendEmail(null, null, "BIOSAMPLES ERROR - Incremental Update - Removing more than one document! id-> " + idElement, " documents found:" + countToDelete); // I will launch an exception throw new Exception( "BIOSAMPLES ERROR - Incremental Update - Removing more than one document in incremental update id-> " + idElement + " documents found:" + countToDelete); } logger.debug("The document with id [{}] is being deleted from Lucene", idElement); w.deleteDocuments(idTerm); // need to remove one from the number of // documents count countNodes--; } // the element doesn't exist on GUI else { // if it is marked to delete I will just an // warning email - it's possible that it was // inserted and deleted on the Backend but it // had never been sent to the GUI before if (toDelete) { Application.getInstance().sendEmail(null, null, "BIOSAMPLES WARNING - Incremental Update - Id marked for deletion but the id doesn't exist on the GUI! id-> " + idElement, ""); } } // if (toDelete) { // logger.debug( // "The document with id [{}] was marked to deletion so I will not process it", // idElement); // } else { // I just process it is it is not for deletion) if (!toDelete) { try { d = processEntryIndex(node, config, service, fieldXpe); } catch (Exception x) { String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config); logger.error("XML that was being processed when the error occurred DB->[{}]", xmlError); // to avoid the next running to stop // because its not able to delete the // newSetup directory w.close(); throw new Exception("Xml that is being processed:" + xmlError, x); } countNodes++; addIndexDocument(w, d); } } // } documentNodes = null; source = null; reader = null; xml = null; // logger.debug("count->[{}]", countNodes); } } logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault); pageNumber++; // if (coll != null) { // try { // // coll.close(); // } catch (Exception e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // } set = null; } setCountDocuments(countNodes); // add metadata to the lucene index Map<String, String> map = new HashMap<String, String>(); map.put("numberDocs", Integer.toString(countNodes)); map.put("date", Long.toString(System.nanoTime())); // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation()); // I cannot call directly // getComponent("XmlDbConnectionPool").getMetaDataInformation(), // because I can be working in a did String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool")) .getDBInfo(dbHost, dbPort, dbPassword, dbName); // TODO: I need to put here what I have before - to track all the // changes (old numberDocs + old date + oldDBInfo) map.put("DBInfo", dbInfo + "<BR>##################################################<BR>" + getMetadataInformation()); commitIndex(w, map); } catch (Exception x) { logger.error("Caught an exception:", x); w.close(); throw x; } }
From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.AbstractIndexEnvironment.java
public Document processEntryIndex(Object node, Configuration config, XPathQueryService service, Map<String, XPathExpression> fieldXpe) throws Exception { Document luceneDoc = new Document(); XPath xp = new XPathEvaluator(config); for (FieldInfo field : fields.values()) { try {/*from www. ja va 2s .com*/ if (!field.process) { List values = (List) fieldXpe.get(field.name).evaluate(node, XPathConstants.NODESET); for (Object v : values) { if ("integer".equals(field.type)) { addIntIndexField(luceneDoc, field.name, v, field.shouldStore, field.shouldSort); } else if ("date".equals(field.type)) { // todo: // addDateIndexField(d, // field.name, // v); logger.error("Date fields are not supported yet, field [{}] will not be created", field.name); } else if ("boolean".equals(field.type)) { addBooleanIndexField(luceneDoc, field.name, v, field.shouldSort); } else { addIndexField(luceneDoc, field.name, v, field.shouldAnalyze, field.shouldStore, field.shouldSort); } } } else { if (field.name.equalsIgnoreCase("attributes")) { // implement here the biosamples // database sample attributes // logic // TODO: rpe // logger.debug("There is A special treatment for this field->" // + field.name); List values = (List) fieldXpe.get(field.name).evaluate(node, XPathConstants.NODESET); for (Iterator iterator = values.iterator(); iterator.hasNext();) { Object object = (Object) iterator.next(); // logger.debug("attributes->" + object); String valClass = (String) fieldXpe.get("attributeName").evaluate(object, XPathConstants.STRING); //TODO: document this on trac and on website documentations help valClass = valClass.replace(" ", "_").toLowerCase(); //valClass=valClass.toLowerCase(); String valType = (String) fieldXpe.get("attributeType").evaluate(object, XPathConstants.STRING); String valValue = (String) fieldXpe.get("attributeValue").evaluate(object, XPathConstants.STRING); if (!valType.equalsIgnoreCase("integer") && !valType.equalsIgnoreCase("real")) { //TODO: change this value valValue = valValue.substring(0, Math.min(valValue.length(), 25)); addIndexField(luceneDoc, "attributes", "=" + valClass + "= " + valValue, true, false, true); } else { valValue = valValue.trim(); int val = 0; if (valValue == null || valValue.equalsIgnoreCase("") || valValue.equalsIgnoreCase("NaN")) { valValue = "0"; } BigDecimal num = new BigDecimal(valValue); num = num.multiply(new BigDecimal(100)); int taux = num.toBigInteger().intValue(); valValue = String.format("%07d", taux); //I need to mantain the spaces for lucene consider different words addIndexField(luceneDoc, "attributes", "=" + valClass + "= " + valValue, true, false, true); } // logger.debug("@class->" + valClass); // logger.debug("@type->" + valType); // logger.debug("text->" + valValue); } } else { // logger.debug("There is NO special treatment for this field->" // + field.name); } } } catch (XPathExpressionException x) { String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config); logger.error("Field being processed->[{}]", field.name); xmlError = "##FIELD BEING PROCESSED##->" + field.name + "\n" + xmlError; logger.error("XPathExpressionException->[{}]", x.getMessage()); logger.error("Caught an exception while indexing expression [" + field.path + "] for document [" + ((NodeInfo) node).getStringValue().substring(0, 20) + "...]", x); throw new Exception("XPathExpressionException Xml:" + xmlError, x); } catch (Exception xe) { String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config); logger.error("Generic Exception->[{}]", xe.getMessage()); throw new Exception("Generic Exception Xml:" + xmlError, xe); } } return luceneDoc; }
From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.AbstractIndexEnvironment.java
public void indexFromXmlDB_FACETS(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword, String dbName) throws Exception { int countNodes = 0; String driverXml = ""; String connectionString = ""; Collection coll;/* w w w . java2s .c o m*/ IndexWriter w = null; Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>(); try { Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId)); w = createIndex(indexTempDirectory, indexAnalyzer); Directory taxDir = FSDirectory.open(new File(indexLocationDirectory + "Facets", indexId)); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxDir); CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter); HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance() .getPreferences().getConfSubset("bs.xmldatabase"); if (null != connsConf) { // TODO: rpe use the component XmlDatabasePooling driverXml = connsConf.getString("driver"); // I will use the connectionString that was passed by parameter // (in several parameters) connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName; } else { logger.error("bs.xmldatabase Configuration is missing!!"); } // I cannot register this database again (this is already registered // on XmlDbConnectionPool Component - // java.nio.channels.OverlappingFileLockException // c = Class.forName(driverXml); // db = (Database) c.newInstance(); // DatabaseManager.registerDatabase(db); logger.debug("connectionString->" + connectionString); coll = DatabaseManager.getCollection(connectionString); XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0"); DocumentInfo source = null; // Loop through all result items // collect all the fields data Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory .getConfiguration(); XPath xp = new XPathEvaluator(config); // XPathExpression xpe = xp.compile(this.env.indexDocumentPath); for (FieldInfo field : fields.values()) { fieldXpe.put(field.name, xp.compile(field.path)); logger.debug("Field Path->[{}]", field.path); } // the xmldatabase is not very correct and have memory problem for // queires with huge results, so its necessary to implement our own // iteration mechanism // // // I will collect all the results // ResourceSet set = service.query(this.env.indexDocumentPath); // //TODO rpe // //ResourceSet set = service.query("//Sample"); // logger.debug("Number of results->" + set.getSize()); // long numberResults = set.getSize(); long numberResults = 0; ResourceSet set = service.query("count(" + indexDocumentPath + ")"); if (set.getIterator().hasMoreResources()) { numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent()); } logger.debug("Number of results->" + numberResults); long pageSizeDefault = 50000; // the samplegroup cannot be big otherwise I will obtain a memory // error ... but the sample must b at least one million because the // paging queries are really slow - we need to balance it // (for samples 1million, for samplegroup 50000) if (numberResults > 1000000) { pageSizeDefault = 1000000; } long pageNumber = 1; int count = 0; Map<String, AttsInfo[]> cacheAtt = new HashMap<String, AttsInfo[]>(); Map<String, XPathExpression> cacheXpathAtt = new HashMap<String, XPathExpression>(); Map<String, XPathExpression> cacheXpathAttValue = new HashMap<String, XPathExpression>(); while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) { // while ((pageNumber<=1)) { // calculate the last hit long pageInit = (pageNumber - 1) * pageSizeDefault + 1; long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault : (numberResults - pageInit + 1); service = (XPathQueryService) coll.getService("XPathQueryService", "1.0"); // xquery paging using subsequence function long time = System.nanoTime(); // /set = // service.query("for $x in(/Biosamples/SampleGroup/Sample/@id) return string($x)"); set = service.query("for $x in(subsequence(" + indexDocumentPath + "/@id," + pageInit + "," + pageSize + ")) return string($x)"); // logger.debug("Number of results of page->" + set.getSize()); double ms = (System.nanoTime() - time) / 1000000d; logger.info("Query XMLDB took ->[{}]", ms); ResourceIterator iter = set.getIterator(); XPath xp2; XPathExpression xpe2; List documentNodes; StringReader reader; // cache of distinct attributes fora each sample group while (iter.hasMoreResources()) { count++; logger.debug("its beeing processed the number ->" + count); String idSample = (String) iter.nextResource().getContent(); logger.debug("idSample->" + idSample); // I need to get the sample ResourceSet setid = service.query(indexDocumentPath + "[@id='" + idSample + "']"); // System.out.println("/Biosamples/SampleGroup/Sample[@id='" // + idSample + "']"); ResourceIterator iterid = setid.getIterator(); List<CategoryPath> sampleCategories = null; while (iterid.hasMoreResources()) { // System.out.println(""); // /xml=(String) iterid.nextResource().getContent(); // /xml=(String) iter.nextResource().getContent(); // logger.debug("xml->"+xml); // /reader = new StringReader(xml); StringBuilder xml = new StringBuilder(); xml.append((String) iterid.nextResource().getContent()); // logger.debug(xml.toString()); reader = new StringReader(xml.toString()); source = config.buildDocument(new StreamSource(reader)); // logger.debug("XML DB->[{}]", // PrintUtils.printNodeInfo((NodeInfo) source, config)); Document d = new Document(); xp2 = new XPathEvaluator(source.getConfiguration()); int position = indexDocumentPath.lastIndexOf("/"); ; String pathRoot = ""; if (position != -1) { pathRoot = indexDocumentPath.substring(position); } else { pathRoot = indexDocumentPath; } // logger.debug("PathRoot->[{}]",pathRoot); xpe2 = xp2.compile(pathRoot); // TODO rpe // xpe2 = xp2.compile("/Sample"); documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET); for (Object node : documentNodes) { // logger.debug("XML DB->[{}]",PrintUtils.printNodeInfo((NodeInfo)node,config)); for (FieldInfo field : fields.values()) { try { // Configuration // config=doc.getConfiguration(); // I Just have to calculate the Xpath if (!field.process) { List values = (List) fieldXpe.get(field.name).evaluate(node, XPathConstants.NODESET); // logger.debug("Field->[{}] values-> [{}]", // field.name, // values.toString()); for (Object v : values) { if ("integer".equals(field.type)) { addIntIndexField(d, field.name, v, field.shouldStore, field.shouldSort); // Just to test I will put here // one facet for the samples if (field.name.equalsIgnoreCase("samples")) { System.out.println("Value-->" + v.toString()); sampleCategories = new ArrayList<CategoryPath>(); sampleCategories.add(new CategoryPath("samples", v.toString())); } } else if ("date".equals(field.type)) { // todo: addDateIndexField(d, // field.name, // v); logger.error( "Date fields are not supported yet, field [{}] will not be created", field.name); } else if ("boolean".equals(field.type)) { addBooleanIndexField(d, field.name, v, field.shouldSort); } else { addIndexField(d, field.name, v, field.shouldAnalyze, field.shouldStore, field.shouldSort); } } } else { if (field.name.equalsIgnoreCase("attributes")) { // implement here the biosamples // database sample attributes logic // TODO: rpe // logger.debug("There is A special treatment for this field->" // + field.name); List values = (List) fieldXpe.get(field.name).evaluate(node, XPathConstants.NODESET); // XPathExpression // classAtt=xp.compile("@class"); // XPathExpression // typeAtt=xp.compile("@dataType"); // XPathExpression // valueAtt=xp.compile("value"); String groupId = (String) fieldXpe.get("samplegroup").evaluate(node, XPathConstants.STRING); String id = (String) fieldXpe.get("accession").evaluate(node, XPathConstants.STRING); // logger.debug(groupId+"$$$" + id); // logger.debug("Field->[{}] values-> [{}]", // field.name, // values.toString()); AttsInfo[] attsInfo = null; if (cacheAtt.containsKey(groupId)) { attsInfo = cacheAtt.get(groupId); } else { logger.debug("No exists cache for samplegroup->" + groupId); // ResourceSet setAtt = // service.query("distinct-values(/Biosamples/SampleGroup[@id='" // + groupId + // "']/Sample/attribute[@dataType!='INTEGER']/replace(@class,' ', '-'))"); // /ResourceSet setAtt = // service.query("distinct-values(/Biosamples/SampleGroup[@id='" // + groupId + // "']/Sample/attribute/replace(@class,' ', '-'))"); // /ResourceSet setAtt = // service.query("distinct-values(/Biosamples/SampleGroup[@id='" // + groupId + // "']/Sample/attribute/@class)"); ResourceSet setAtt = service .query("data(/Biosamples/SampleGroup[@id='" + groupId + "']/SampleAttributes/attribute/@class)"); // logger.debug("->" // + // "/Biosamples/SampleGroup[@id='" // + groupId + // "']/SampleAttributes/attribute/@class"); ResourceIterator resAtt = setAtt.getIterator(); int i = 0; attsInfo = new AttsInfo[(int) setAtt.getSize()]; while (resAtt.hasMoreResources()) { String classValue = (String) resAtt.nextResource().getContent(); // logger.debug("->" // + classValue); // need to use this because // of the use of quotes in // the name of the classes String classValueWitoutQuotes = classValue.replaceAll("\"", "\"\""); // logger.debug("Class value->" // + classValue); XPathExpression xpathAtt = null; XPathExpression xpathAttValue = null; if (cacheXpathAtt.containsKey(classValue)) { xpathAtt = cacheXpathAtt.get(classValue); xpathAttValue = cacheXpathAttValue.get(classValue); } else { xpathAtt = xp.compile("./attribute[@class=\"" + classValueWitoutQuotes + "\"]/@dataType"); xpathAttValue = xp.compile( "attribute[@class=\"" + classValueWitoutQuotes + "\"]/value/text()[last()]"); // logger.debug("attribute[@class=\"" // + // classValueWitoutQuotes // + // "\"]//value/text()"); // //xpathAttValue=xp.compile("./attribute[@class=\"" // + // classValueWitoutQuotes // + // "\"]/value[1]/text()"); // logger.debug("./attribute[@class=\"" // + // classValueWitoutQuotes // + // "\"]/value[1]/text()"); cacheXpathAtt.put(classValue, xpathAtt); cacheXpathAttValue.put(classValue, xpathAttValue); } // this doesnt work when the // first sample of sample // group doens have all the // attributes // im using \" becuse there // are some attributes thas // has ' on the name!!! // /ResourceSet setAttType = // service.query("string((/Biosamples/SampleGroup[@id='" // + groupId // +"']/Sample/attribute[@class=replace(\"" // + classValueWitoutQuotes // + // "\",'-',' ')]/@dataType)[1])"); // /ResourceSet setAttType = // service.query("string(/Biosamples/SampleGroup[@id='" // + groupId // +"']/Sample/attribute[@class=\"" // + classValueWitoutQuotes // + "\"]/@dataType)"); ResourceSet setAttType = service .query("data(/Biosamples/SampleGroup[@id='" + groupId + "']/SampleAttributes/attribute[@class=\"" + classValueWitoutQuotes + "\"]/@dataType)"); String dataValue = (String) setAttType.getIterator() .nextResource().getContent(); // logger.debug("Data Type of " // + classValue + " ->" + // dataValue); // String // dataValue=(String)xpathAtt.evaluate(node, // XPathConstants.STRING); AttsInfo attsI = new AttsInfo(classValue, dataValue); // logger.debug("Atttribute->class" // + attsI.name + "->type->" // + attsI.type + "->i" + // i); attsInfo[i] = attsI; // logger.debug("distinct att->" // + value); // cacheAtt.put(groupId, // value); i++; } cacheAtt.put(groupId, attsInfo); // distinctAtt=cacheAtt.get(groupId); // logger.debug("Already exists->" // + distinctAtt); } int len = attsInfo.length; for (int i = 0; i < len; i++) { // logger.debug("$$$$$$->" + // attsInfo[i].name + "$$$$" + // attsInfo[i].type); if (!attsInfo[i].type.equalsIgnoreCase("integer") && !attsInfo[i].type.equalsIgnoreCase("real")) { XPathExpression valPath = cacheXpathAttValue .get(attsInfo[i].name); String val = (String) valPath.evaluate(node, XPathConstants.STRING); // logger.debug("$$$$$$->" + // "STRING->" + val + ""); addIndexField(d, (i + 1) + "", val, true, false, true); } else { XPathExpression valPath = cacheXpathAttValue .get(attsInfo[i].name); String valS = (String) valPath.evaluate(node, XPathConstants.STRING); valS = valS.trim(); // logger.debug("Integer->" // + valS); int val = 0; if (valS == null || valS.equalsIgnoreCase("") || valS.equalsIgnoreCase("NaN")) { valS = "0"; } // sort numbers as strings // logger.debug("class->" + // attsInfo[i].name // +"value->##"+ valS + // "##"); BigDecimal num = new BigDecimal(valS); num = num.multiply(new BigDecimal(100)); int taux = num.toBigInteger().intValue(); valS = String.format("%07d", taux); // logger.debug("Integer->" // + valS + "position->" // +(i+1)+"integer"); addIndexField(d, (i + 1) + "", valS, true, false, true); // addIntIndexField(d, // (i+1)+"integer", new // BigInteger(valS),false, // true); // } } } else { // logger.debug("There is NO special treatment for this field->" // + field.name); } } } catch (XPathExpressionException x) { String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config); logger.error("XML DB->[{}]", xmlError); logger.error("Caught an exception while indexing expression [" + field.path + "] for document [" + ((NodeInfo) source).getStringValue().substring(0, 20) + "...]", x); throw new Exception("Xml:" + xmlError, x); } } } documentNodes = null; source = null; reader = null; xml = null; countNodes++; // logger.debug("count->[{}]", countNodes); // facet tests docBuilder.setCategoryPaths(sampleCategories); docBuilder.build(d); addIndexDocument(w, d); } } logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault); pageNumber++; if (coll != null) { try { // coll.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } set = null; } setCountDocuments(countNodes); // add metadata to the lucene index Map<String, String> map = new HashMap<String, String>(); map.put("numberDocs", Integer.toString(countNodes)); map.put("date", Long.toString(System.nanoTime())); // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation()); // I cannot call directly // getComponent("XmlDbConnectionPool").getMetaDataInformation(), // because I can be working in a did String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool")) .getDBInfo(dbHost, dbPort, dbPassword, dbName); map.put("DBInfo", dbInfo); // facet taxoWriter.commit(); taxoWriter.close(); commitIndex(w, map); } catch (Exception x) { logger.error("Caught an exception:", x); w.close(); throw x; } }