List of usage examples for org.dom4j DocumentHelper parseText
public static Document parseText(String text) throws DocumentException
parseText
parses the given text as an XML document and returns the newly created Document.
From source file:MyLibrary.DoExchangeRate.java
/** * yahooapi??//from w w w .j a v a 2 s . c o m * @param sourceCurrency ??? * @return doublenull * @throws Exception */ @SuppressWarnings("unchecked") public double getExchangeRateByYahooApi(String sourceCurrency) throws Exception { String url = "http://finance.yahoo.com/webservice/v1/symbols/allcurrencies/quote"; String httpResult = DoHttpRequest1.doGet(url, "utf-8", 2); Document Document1 = DocumentHelper.parseText(httpResult); Element root = Document1.getRootElement(); List<Node> nodes = root.selectNodes("//resource"); Predicate<Node> usd = (node) -> node.selectSingleNode("field").getText().equals("USD/" + sourceCurrency); Object[] results = nodes.stream().filter(usd).map((node) -> { Node n = (Node) node; String xmlStr = n.selectSingleNode("field[2]").getText(); double rate = 1.00 / Double.parseDouble(xmlStr); return rate; }).toArray(); Double result = (Double) results[0]; return result; }
From source file:nl.knaw.dans.common.fedora.fox.DatastreamVersion.java
License:Apache License
public void setXmlContent(String xmlString) throws DocumentException { Document document = DocumentHelper.parseText(xmlString); this.xmlContent = new XMLContent(document.getRootElement()); }
From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java
License:Apache License
/** Transform the given foxml to a Solr indexing document * * @param foxml The xml of the fodora object (indexed) * @return The xml document (for updating the Solr index) * @throws SearchServiceException//w ww . j a v a 2 s . c o m */ private Document transformFoxml(String foxml) throws SearchServiceException { if (foxml == null) throw new IllegalArgumentException(); // get the xslt to transform with, specific for Solr final String DCCD_TO_SOLR_XSLT_FILENAME = "dccdToSolr.xslt"; ClassLoader loader = Thread.currentThread().getContextClassLoader(); URL xsltUrl = loader.getResource(DCCD_TO_SOLR_XSLT_FILENAME); logger.info("Transform foxml to indexing document using " + DCCD_TO_SOLR_XSLT_FILENAME); // transform foxml with xslt Document document = null; Transformer transformer = null; Document transformedDoc = null; TransformerFactory transformerFactory = TransformerFactory.newInstance(); try { document = DocumentHelper.parseText(foxml); //transformer = transformerFactory.newTransformer( new StreamSource(xsltStr ) ); transformer = transformerFactory.newTransformer(new StreamSource(xsltUrl.getFile())); DocumentSource source = new DocumentSource(document); DocumentResult result = new DocumentResult(); transformer.transform(source, result); transformedDoc = result.getDocument(); } catch (TransformerConfigurationException e) { // ? should not happen ? throw new SearchServiceException(e); } catch (DocumentException e) { throw new SearchServiceException(e); } catch (TransformerException e) { throw new SearchServiceException(e); } // print transformedDoc /* try { OutputFormat format = OutputFormat.createPrettyPrint(); System.out.println("xml:\n"); XMLWriter writer = new XMLWriter( System.out, format ); writer.write( transformedDoc ); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } */ return transformedDoc; }
From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java
License:Apache License
/** * Search the repository using the Solr indexer * Use offset and limit for paging the results * * Note: FedoraRepositoryService.getSearchResultAsXMLString() * searches Fedora and does not using Solr! * * @param query The query to search for (Solr) * @param offset Zero base index ofsset/* w w w. ja v a 2 s . c o m*/ * @param limit maximum number of resulting items to return * @return * @throws SearchServiceException */ public List<String> simpleSearch(String query, int offset, int limit) throws SearchServiceException { // max results should be bigger than 0 if (limit < 1) throw new IllegalArgumentException("limit must be 1 or bigger"); if (offset < 0) throw new IllegalArgumentException("offset must be 0 or bigger"); List<String> result = new ArrayList<String>(); // empty list Properties settings = DccdConfigurationService.getService().getSettings(); final String protocol = settings.getProperty("solr.protocol"); final String host = settings.getProperty("solr.host"); final int port = Integer.parseInt(settings.getProperty("solr.port")); final String context = settings.getProperty("solr.context"); // use Solr to find project with the given query // the REST interface? //final int limit = 100; //String protocol = "http"; //String host = "localhost";//"dendro01.dans.knaw.nl"; //int port = 8082;//80; //String context = "solr-example/select"; String baseURLString = protocol + "://" + host + ":" + port + "/" + context; String responseString = ""; URL solrSearch; //BufferedReader in = null; Scanner in = null; StringBuilder response = new StringBuilder(); try { // Note: the query string should be url encoded? String requestUrlString = baseURLString + "/?q=" + query + "&version=2.2&start=" + offset + "&rows=" + limit + "&indent=on"; logger.info("Solr search request: " + requestUrlString); solrSearch = new URL(requestUrlString); URLConnection fs = solrSearch.openConnection(); in = new Scanner(fs.getInputStream()); while (in.hasNextLine()) { response.append(in.nextLine()); response.append("\n"); } } catch (MalformedURLException e) { // this really should not happen, the url is coded throw new RuntimeException(e); } catch (IOException e) { //throw e; throw new SearchServiceException(e); } if (in != null) in.close(); responseString = response.toString(); // show response //logger.info("Response: \n" + response.toString()); // parse the xml string // make this into separate function Document domDoc; try { domDoc = DocumentHelper.parseText(responseString); Element xmlResponse = domDoc.getRootElement(); // get result element Element xmlResult = xmlResponse.element("result"); // for all doc elements for (Iterator<?> i = xmlResult.elementIterator("doc"); i.hasNext();) { Element docElement = (Element) i.next(); // get str element with attribute name="PID" //Node node = docElement.selectSingleNode( "str[@name='PID']" ); // tridas objects instead of projects; // using another ID and not the fedora object PID Node node = docElement.selectSingleNode("str[@name='ID']"); logger.info("Found: " + node.getText()); result.add(node.getText()); } } catch (DocumentException e) { //throw e; throw new SearchServiceException(e); } return result; }
From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java
License:Apache License
public DccdSearchResult search(String query, int offset, int limit) throws SearchServiceException { DccdSearchResult result = new DccdSearchResult(); // max results should be bigger than 0 if (limit < 1) throw new IllegalArgumentException("limit must be 1 or bigger"); if (offset < 0) throw new IllegalArgumentException("offset must be 0 or bigger"); List<String> resultIds = new ArrayList<String>(); // empty list Properties settings = DccdConfigurationService.getService().getSettings(); final String protocol = settings.getProperty("solr.protocol"); final String host = settings.getProperty("solr.host"); final int port = Integer.parseInt(settings.getProperty("solr.port")); final String context = settings.getProperty("solr.context"); // use Solr to find project with the given query // the REST interface? //final int limit = 100; //String protocol = "http"; //String host = "localhost";//"dendro01.dans.knaw.nl"; //int port = 8082;//80; //String context = "solr-example/select"; String baseURLString = protocol + "://" + host + ":" + port + "/" + context; String responseString = ""; URL solrSearch;/*from w ww . j a v a 2 s. c om*/ //BufferedReader in = null; Scanner in = null; StringBuilder response = new StringBuilder(); URLConnection fs = null; try { // Note: the query string should be url encoded? String requestUrlString = baseURLString + "/?q=" + query + "&version=2.2&start=" + offset + "&rows=" + limit + "&indent=on" + "&debugQuery=true"; // debugging on TODO: remove in production! //"/?q=" + query + "&q.op=AND&version=2.2&start=" + offset + "&rows=" + limit + "&indent=on"; // TODO: // What can we expect; HTTP response code: 400? // - why does this give an IO exception on openConnection logger.info("Solr search request: " + requestUrlString); solrSearch = new URL(requestUrlString); fs = solrSearch.openConnection(); in = new Scanner(fs.getInputStream()); while (in.hasNextLine()) { response.append(in.nextLine()); response.append("\n"); } } catch (MalformedURLException e) { // this really should not happen, the url is coded logger.debug("Malformed URL Exception while requesting Solr search"); throw new RuntimeException(e); } catch (IOException e) { // Try to get an idea of what happened here... // HTTP response (errors can give indication about what went wrong) // get the error stream... if ((fs instanceof HttpURLConnection)) { try { String responseMsg = ((HttpURLConnection) fs).getResponseMessage(); response.append(responseMsg); // set this as the result //result.setResponseString(response.toString()); } catch (IOException e1) { // ignore logger.debug("Unable to get response message about IO exception"); //e1.printStackTrace(); } /* html page with the response InputStream err = ((HttpURLConnection)fs).getErrorStream(); if (err != null) { in = new Scanner(err); while (in.hasNextLine()) { response.append(in.nextLine()); response.append("\n"); } } */ } logger.debug("IO exception while reading Solr response: " + response); throw new SearchServiceException(response.toString(), e); } if (in != null) in.close(); responseString = response.toString(); // show response //logger.info("Response: \n" + responseString); // keep response for testing/debugging purposes result.setResponseString(responseString); // parse the xml string // note: maybe make this into separate function int numFound = 0; Document domDoc; try { domDoc = DocumentHelper.parseText(responseString); Element xmlResponse = domDoc.getRootElement(); // get result element Element xmlResult = xmlResponse.element("result"); //numFound //<result name="response" numFound="3" start="0"> numFound = Integer.parseInt(xmlResult.attribute("numFound").getText(), 10); logger.info("Total hits: " + numFound); // for all doc elements for (Iterator<?> i = xmlResult.elementIterator("doc"); i.hasNext();) { Element docElement = (Element) i.next(); // get str element with attribute name="PID" //Node node = docElement.selectSingleNode( "str[@name='PID']" ); // tridas objects instead of projects; // using another ID and not the fedora object PID Node node = docElement.selectSingleNode("str[@name='ID']"); if (node == null) { logger.warn("Found doc element without <str name=\"ID\" > subelement"); } else { logger.info("Found: " + node.getText()); resultIds.add(node.getText()); } } } catch (DocumentException e) { //throw e; logger.debug("Document Exception while parsing xml response from Solr: " + responseString); throw new SearchServiceException(e); } // update results result.setResultItemIds(resultIds); result.setNumFound(numFound); return result; }
From source file:nl.knaw.dans.dccd.model.entities.AbstractEntity.java
License:Apache License
public Document asDocument() throws XMLSerializationException { String xmlString = asXMLString(); Document xmlDocument;//ww w . j ava2 s . co m try { xmlDocument = DocumentHelper.parseText(xmlString); } catch (DocumentException e) { throw new XMLSerializationException(e); } return xmlDocument; }
From source file:nl.knaw.dans.dccd.model.entities.AbstractEntity.java
License:Apache License
public Element asElement() throws XMLSerializationException { String xmlString = asXMLString(); Document xmlDocument;/*w w w . j a va 2 s . c o m*/ Element xmlElement; try { xmlDocument = DocumentHelper.parseText(xmlString); xmlElement = xmlDocument.getRootElement(); } catch (DocumentException e) { throw new XMLSerializationException(e); } return xmlElement; }
From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java
License:Apache License
/** Creates the list with projects specified by the given xml string * The project only have the id and title set and no further data * Create empty list if nothing found/* www .ja va 2 s . c o m*/ * * @param xmlResultString * @return * @throws DocumentException If the input string is incorrect parsing fails */ @SuppressWarnings("unchecked") private Collection<Project> createProjectListFromXMLResultString(String xmlResultString) throws DocumentException { if (xmlResultString == null || xmlResultString.length() == 0) throw new IllegalArgumentException("string must be specified"); // should be xml from fedora, parse it with dom4j // and fill the list with projects Collection<Project> dendroProjects = new ArrayList<Project>(); Document domDoc; try { domDoc = DocumentHelper.parseText(xmlResultString);//response.toString()); Element xmlResult = domDoc.getRootElement(); Element xmlResultList = xmlResult.element("resultList"); // logger.info("root: " + xmlResultList.asXML()); // get all objectFields for (Iterator i = xmlResultList.elementIterator("objectFields"); i.hasNext();) { Element objectFields = (Element) i.next(); // get id String id = objectFields.elementText("pid"); // get titel String title = objectFields.elementText("title"); logger.info("Found Id: " + id + " Title: " + title); Project dendroProject = new Project(id, title); dendroProjects.add(dendroProject); } } catch (DocumentException e) { throw e; } return dendroProjects; }
From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java
License:Apache License
/** Get the entityTree for the project, other datastreams are ignored * * note: Seems not much more efficient than retrieving the complete project * because it retrieves and unmarshall's the complete foxml * although it doesn't convert all the tridas datastreams with JAXB * * @param project The project for which the entityTree is retrieved *//*from w w w . ja va2 s. c o m*/ public void retrieveEntityTree(Project project) throws DccdRepositoryException { if (project == null) throw new IllegalArgumentException("project must be specified"); if (project.getSid() == null || project.getSid().length() == 0) throw new IllegalArgumentException("project must have an id specified"); String id = project.getSid(); // get the tree stream MIMETypedStream stream = null; try { stream = datastreamAccessor.getDatastreamDissemination(id, EntityTree.ENTITYTREE_ID, null); } catch (RepositoryException e) { throw new DccdRepositoryException(e); } String xmlStr = ""; try { xmlStr = new String(stream.getStream(), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new DccdRepositoryException(e); } Document documentDom = null; try { documentDom = DocumentHelper.parseText(xmlStr); } catch (DocumentException e) { logger.error("dom4j exception"); throw new DccdRepositoryException(e); } Element treeElement = documentDom.getRootElement(); // show tree System.out.println("\n--- Begin tree struct ---"); System.out.print(treeElement.asXML()); System.out.println("\n--- End tree struct ---"); // create the entity tree // use the tree from the project EntityTree entityTree = project.entityTree; //entityTree.buildTree(treeElement); entityTree.setProjectEntity(EntityTreeBuilder.buildTree(treeElement)); }
From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java
License:Apache License
/** Ingest given project data; store it * * @param project//ww w . ja va 2 s .c o m * @throws RepositoryException */ public void ingest(Project project) throws DccdRepositoryException {//throws RepositoryException { if (project == null) throw new IllegalArgumentException("project must be specified"); if (!project.hasTridas()) throw new IllegalArgumentException("project must have tridas data"); // create the entity tree, now use the tree from the project // note: recreate whole tree no matter what is already there! EntityTree entityTree = project.entityTree; entityTree.buildTree(project.getTridas()); // create a new Fedora digital object for this dendro project and // have Fedora generate an unique Id for us DigitalObject dob = new DigitalObject(DobState.Active, DO_ID_NS); // properties dob.setLabel(entityTree.getProjectEntity().getTitle()); // use title from Project dob.setOwnerId("testDepositorId"); // This should be a real id someday! // Add to the Fedora DC, no need to make a separate datastream DublinCoreMetadata dcmd = new JiBXDublinCoreMetadata(); dcmd.addLanguage(project.getTridasLanguage().getLanguage()); dcmd.addTitle(entityTree.getProjectEntity().getTitle()); // now we must do it ourselves try { dob.addDatastreamVersion(dcmd); } catch (XMLSerializationException e) { logger.info("Could not serialize XML for the Dublin Core"); throw new DccdRepositoryException(e); } /* could have a separate datastream Datastream dcDatastream = new Datastream("DCM", ControlGroup.X); // there is only one! dob.putDatastream(dcDatastream); // actually add the stream to the dataobject! dcDatastream.setState(Datastream.State.A); DatastreamVersion dcVersion = dcDatastream.addDatastreamVersion(dcDatastream.nextVersionId(), "text/xml"); dcVersion.setLabel("dublincoremetadata"); try { dcVersion.setXmlContent(dcmd.asElement()); } catch (XMLSerializationException e) { // TODO Auto-generated catch block e.printStackTrace(); } */ // Add the entitytree structure datastream Datastream datastream = new Datastream(EntityTree.ENTITYTREE_ID, ControlGroup.X); // there is only one! dob.putDatastream(datastream); // actually add the stream to the dataobject! datastream.setState(Datastream.State.A); DatastreamVersion version = datastream.addDatastreamVersion(datastream.nextVersionId(), "text/xml"); version.setLabel("entitytreestruct"); version.setXmlContent(entityTree.getTreeStructAsDocument().getRootElement()); // get all fragments convert them to datastreams // and add them to the digital object List<Entity> entities = entityTree.getEntities(); for (Entity entity : entities) { String streamId = entity.getId(); // TESTING Stress testing: // repeat this X times, change the Id as well, otherwise Fedoro won't take it //for(int i=0; i<200; i++) { //logger.info("i = "+ i); //streamId+="-"+Integer.toString(i); datastream = new Datastream(streamId, ControlGroup.X); dob.putDatastream(datastream); // actually add the stream to the dataobject! datastream.setState(Datastream.State.A); version = datastream.addDatastreamVersion(datastream.nextVersionId(), "text/xml"); version.setLabel(entity.getLabel());//("entity"); // Note: use title? //set the uri for the xml content; our TRiDaS fragments //URI streamFormatURI; //try { // streamFormatURI = new URI("http://www.tridas.org/1.2"); // version.setFormatURI(streamFormatURI); //} catch (URISyntaxException e) { // e.printStackTrace(); //} // add entity to the datastreamversion try { String xmlString = entity.getXMLString(); // Note: maybe there is to much conversion going on here, // at the end there must be an xml string? Document domDoc = DocumentHelper.parseText(xmlString); Element xmlContent = domDoc.getRootElement(); // only for the Project, add a xml:lang attribute //if (entity instanceof ProjectEntity) { // xmlContent.addAttribute("xml:lang", project.getTridasLanguage().getLanguage()); //} //logger.info("dom4j XML: \n"+ xmlContent.asXML()); version.setXmlContent(xmlContent); } catch (DocumentException e) { logger.info("Could not parse XML"); throw new DccdRepositoryException(e); } } // end, for all entities //}// end TESTING // Store digital object, with all streams in it String logMessage = new String(""); try { objectManager.ingest(dob, logMessage); // project.setSid(dob.getSid()); project.setStoreId(dob.getSid()); logger.info("ingested project with sid: " + dob.getSid()); } catch (RepositoryException e) { throw new DccdRepositoryException(e); } }