Example usage for org.dom4j DocumentHelper parseText

Introduction

In this page you can find the example usage for org.dom4j DocumentHelper parseText.

Prototype

public static Document parseText(String text) throws DocumentException

Source Link

Document

parseText parses the given text as an XML document and returns the newly created Document.

Usage

From source file:MyLibrary.DoExchangeRate.java

/**
 * yahooapi??//from  w w  w  .j a  v a  2 s  .  c o  m
 * @param sourceCurrency ???
 * @return doublenull
 * @throws Exception 
 */
@SuppressWarnings("unchecked")
public double getExchangeRateByYahooApi(String sourceCurrency) throws Exception {
    String url = "http://finance.yahoo.com/webservice/v1/symbols/allcurrencies/quote";
    String httpResult = DoHttpRequest1.doGet(url, "utf-8", 2);
    Document Document1 = DocumentHelper.parseText(httpResult);
    Element root = Document1.getRootElement();
    List<Node> nodes = root.selectNodes("//resource");
    Predicate<Node> usd = (node) -> node.selectSingleNode("field").getText().equals("USD/" + sourceCurrency);
    Object[] results = nodes.stream().filter(usd).map((node) -> {
        Node n = (Node) node;
        String xmlStr = n.selectSingleNode("field[2]").getText();
        double rate = 1.00 / Double.parseDouble(xmlStr);
        return rate;
    }).toArray();
    Double result = (Double) results[0];
    return result;
}

From source file:nl.knaw.dans.common.fedora.fox.DatastreamVersion.java

License:Apache License

public void setXmlContent(String xmlString) throws DocumentException {
    Document document = DocumentHelper.parseText(xmlString);
    this.xmlContent = new XMLContent(document.getRootElement());
}

From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java

License:Apache License

/** Transform the given foxml to a Solr indexing document
 *
 * @param foxml The xml of the fodora object (indexed)
 * @return The xml document (for updating the Solr index)
 * @throws SearchServiceException//w ww  . j a  v  a 2  s . c o m
 */
private Document transformFoxml(String foxml) throws SearchServiceException {
    if (foxml == null)
        throw new IllegalArgumentException();

    // get the xslt to transform with, specific for Solr
    final String DCCD_TO_SOLR_XSLT_FILENAME = "dccdToSolr.xslt";
    ClassLoader loader = Thread.currentThread().getContextClassLoader();
    URL xsltUrl = loader.getResource(DCCD_TO_SOLR_XSLT_FILENAME);

    logger.info("Transform foxml to indexing document using " + DCCD_TO_SOLR_XSLT_FILENAME);
    // transform foxml with xslt
    Document document = null;
    Transformer transformer = null;
    Document transformedDoc = null;
    TransformerFactory transformerFactory = TransformerFactory.newInstance();
    try {
        document = DocumentHelper.parseText(foxml);
        //transformer = transformerFactory.newTransformer( new StreamSource(xsltStr ) );
        transformer = transformerFactory.newTransformer(new StreamSource(xsltUrl.getFile()));
        DocumentSource source = new DocumentSource(document);
        DocumentResult result = new DocumentResult();
        transformer.transform(source, result);
        transformedDoc = result.getDocument();
    } catch (TransformerConfigurationException e) {
        // ? should not happen ?
        throw new SearchServiceException(e);
    } catch (DocumentException e) {
        throw new SearchServiceException(e);
    } catch (TransformerException e) {
        throw new SearchServiceException(e);
    }

    // print transformedDoc
    /*
    try {
        OutputFormat format = OutputFormat.createPrettyPrint();
       System.out.println("xml:\n");
       XMLWriter writer = new XMLWriter( System.out, format );
         writer.write( transformedDoc );
    } catch (UnsupportedEncodingException e) {
       e.printStackTrace();
    } catch (IOException e) {
       e.printStackTrace();
    }
    */

    return transformedDoc;
}

From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java

License:Apache License

/**
 * Search the repository using the Solr indexer
 * Use offset and limit for paging the results
 *
 * Note: FedoraRepositoryService.getSearchResultAsXMLString()
 * searches Fedora and does not using Solr!
 *
 * @param query The query to search for (Solr)
 * @param offset Zero base index ofsset/* w w  w.  ja  v a 2  s .  c  o  m*/
 * @param limit maximum number of resulting items to return
 * @return
 * @throws SearchServiceException
 */
public List<String> simpleSearch(String query, int offset, int limit) throws SearchServiceException {
    // max results should be bigger than 0
    if (limit < 1)
        throw new IllegalArgumentException("limit must be 1 or bigger");
    if (offset < 0)
        throw new IllegalArgumentException("offset must be 0 or bigger");

    List<String> result = new ArrayList<String>(); // empty list

    Properties settings = DccdConfigurationService.getService().getSettings();
    final String protocol = settings.getProperty("solr.protocol");
    final String host = settings.getProperty("solr.host");
    final int port = Integer.parseInt(settings.getProperty("solr.port"));
    final String context = settings.getProperty("solr.context");

    // use Solr to find project with the given query
    // the REST interface?
    //final int limit = 100;

    //String protocol = "http";
    //String host = "localhost";//"dendro01.dans.knaw.nl";
    //int port = 8082;//80;
    //String context = "solr-example/select";
    String baseURLString = protocol + "://" + host + ":" + port + "/" + context;

    String responseString = "";

    URL solrSearch;
    //BufferedReader in = null;
    Scanner in = null;
    StringBuilder response = new StringBuilder();
    try {
        // Note: the query string should be url encoded?
        String requestUrlString = baseURLString + "/?q=" + query + "&version=2.2&start=" + offset + "&rows="
                + limit + "&indent=on";

        logger.info("Solr search request: " + requestUrlString);
        solrSearch = new URL(requestUrlString);
        URLConnection fs = solrSearch.openConnection();
        in = new Scanner(fs.getInputStream());
        while (in.hasNextLine()) {
            response.append(in.nextLine());
            response.append("\n");
        }
    } catch (MalformedURLException e) {
        // this really should not happen, the url is coded
        throw new RuntimeException(e);
    } catch (IOException e) {
        //throw e;
        throw new SearchServiceException(e);
    }
    if (in != null)
        in.close();

    responseString = response.toString();
    // show response
    //logger.info("Response: \n" + response.toString());

    // parse the xml string
    // make this into separate function
    Document domDoc;
    try {
        domDoc = DocumentHelper.parseText(responseString);
        Element xmlResponse = domDoc.getRootElement();
        // get result element
        Element xmlResult = xmlResponse.element("result");
        // for all doc elements
        for (Iterator<?> i = xmlResult.elementIterator("doc"); i.hasNext();) {
            Element docElement = (Element) i.next();
            // get str element with attribute name="PID"
            //Node node = docElement.selectSingleNode( "str[@name='PID']" );
            // tridas objects instead of projects;
            // using another ID and not the fedora object PID
            Node node = docElement.selectSingleNode("str[@name='ID']");
            logger.info("Found: " + node.getText());
            result.add(node.getText());
        }
    } catch (DocumentException e) {
        //throw e;
        throw new SearchServiceException(e);
    }

    return result;
}

From source file:nl.knaw.dans.dccd.application.services.DccdSearchService.java

License:Apache License

public DccdSearchResult search(String query, int offset, int limit) throws SearchServiceException {
    DccdSearchResult result = new DccdSearchResult();

    // max results should be bigger than 0
    if (limit < 1)
        throw new IllegalArgumentException("limit must be 1 or bigger");
    if (offset < 0)
        throw new IllegalArgumentException("offset must be 0 or bigger");

    List<String> resultIds = new ArrayList<String>(); // empty list

    Properties settings = DccdConfigurationService.getService().getSettings();
    final String protocol = settings.getProperty("solr.protocol");
    final String host = settings.getProperty("solr.host");
    final int port = Integer.parseInt(settings.getProperty("solr.port"));
    final String context = settings.getProperty("solr.context");

    // use Solr to find project with the given query
    // the REST interface?
    //final int limit = 100;

    //String protocol = "http";
    //String host = "localhost";//"dendro01.dans.knaw.nl";
    //int port = 8082;//80;
    //String context = "solr-example/select";
    String baseURLString = protocol + "://" + host + ":" + port + "/" + context;

    String responseString = "";

    URL solrSearch;/*from  w ww  .  j  a v a 2  s.  c om*/
    //BufferedReader in = null;
    Scanner in = null;
    StringBuilder response = new StringBuilder();
    URLConnection fs = null;
    try {
        // Note: the query string should be url encoded?
        String requestUrlString = baseURLString + "/?q=" + query + "&version=2.2&start=" + offset + "&rows="
                + limit + "&indent=on" + "&debugQuery=true"; // debugging on TODO: remove in production!

        //"/?q=" + query + "&q.op=AND&version=2.2&start=" + offset + "&rows=" + limit + "&indent=on";

        // TODO:
        // What can we expect; HTTP response code: 400?
        // - why does this give an IO exception on openConnection
        logger.info("Solr search request: " + requestUrlString);
        solrSearch = new URL(requestUrlString);
        fs = solrSearch.openConnection();
        in = new Scanner(fs.getInputStream());
        while (in.hasNextLine()) {
            response.append(in.nextLine());
            response.append("\n");
        }
    } catch (MalformedURLException e) {
        // this really should not happen, the url is coded
        logger.debug("Malformed URL Exception while requesting Solr search");
        throw new RuntimeException(e);
    } catch (IOException e) {
        // Try to get an idea of what happened here...
        // HTTP response (errors can give indication about what went wrong)

        // get the error stream...
        if ((fs instanceof HttpURLConnection)) {
            try {
                String responseMsg = ((HttpURLConnection) fs).getResponseMessage();
                response.append(responseMsg);
                // set this as the result
                //result.setResponseString(response.toString());
            } catch (IOException e1) {
                // ignore
                logger.debug("Unable to get response message about IO exception");
                //e1.printStackTrace();
            }

            /* html page with the response
            InputStream err = ((HttpURLConnection)fs).getErrorStream();
            if (err != null) {
               in = new Scanner(err);
                 while (in.hasNextLine()) {
            response.append(in.nextLine());
            response.append("\n");
                 }
            }
            */
        }
        logger.debug("IO exception while reading Solr response: " + response);

        throw new SearchServiceException(response.toString(), e);
    }
    if (in != null)
        in.close();

    responseString = response.toString();
    // show response
    //logger.info("Response: \n" + responseString);

    // keep response for testing/debugging purposes
    result.setResponseString(responseString);

    // parse the xml string
    // note: maybe make this into separate function
    int numFound = 0;
    Document domDoc;
    try {
        domDoc = DocumentHelper.parseText(responseString);
        Element xmlResponse = domDoc.getRootElement();
        // get result element
        Element xmlResult = xmlResponse.element("result");

        //numFound
        //<result name="response" numFound="3" start="0">
        numFound = Integer.parseInt(xmlResult.attribute("numFound").getText(), 10);
        logger.info("Total hits: " + numFound);

        // for all doc elements
        for (Iterator<?> i = xmlResult.elementIterator("doc"); i.hasNext();) {
            Element docElement = (Element) i.next();
            // get str element with attribute name="PID"
            //Node node = docElement.selectSingleNode( "str[@name='PID']" );
            // tridas objects instead of projects;
            // using another ID and not the fedora object PID
            Node node = docElement.selectSingleNode("str[@name='ID']");
            if (node == null) {
                logger.warn("Found doc element without <str name=\"ID\" > subelement");
            } else {
                logger.info("Found: " + node.getText());
                resultIds.add(node.getText());
            }
        }
    } catch (DocumentException e) {
        //throw e;
        logger.debug("Document Exception while parsing xml response from Solr: " + responseString);
        throw new SearchServiceException(e);
    }

    // update results
    result.setResultItemIds(resultIds);
    result.setNumFound(numFound);

    return result;
}

From source file:nl.knaw.dans.dccd.model.entities.AbstractEntity.java

License:Apache License

public Document asDocument() throws XMLSerializationException {
    String xmlString = asXMLString();
    Document xmlDocument;//ww  w . j ava2  s . co  m
    try {
        xmlDocument = DocumentHelper.parseText(xmlString);
    } catch (DocumentException e) {
        throw new XMLSerializationException(e);
    }

    return xmlDocument;
}

From source file:nl.knaw.dans.dccd.model.entities.AbstractEntity.java

License:Apache License

public Element asElement() throws XMLSerializationException {
    String xmlString = asXMLString();
    Document xmlDocument;/*w w  w .  j  a  va 2  s .  c  o  m*/
    Element xmlElement;
    try {
        xmlDocument = DocumentHelper.parseText(xmlString);
        xmlElement = xmlDocument.getRootElement();
    } catch (DocumentException e) {
        throw new XMLSerializationException(e);
    }

    return xmlElement;
}

From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java

License:Apache License

/** Creates the list with projects specified by the given xml string
 * The project only have the id and title set and no further data
 * Create empty list if nothing found/* www .ja  va  2 s .  c o m*/
 *
 * @param xmlResultString
 * @return
 * @throws DocumentException If the input string is incorrect parsing fails
 */
@SuppressWarnings("unchecked")
private Collection<Project> createProjectListFromXMLResultString(String xmlResultString)
        throws DocumentException {
    if (xmlResultString == null || xmlResultString.length() == 0)
        throw new IllegalArgumentException("string must be specified");

    // should be xml from fedora, parse it with dom4j
    // and fill the list with projects
    Collection<Project> dendroProjects = new ArrayList<Project>();
    Document domDoc;
    try {
        domDoc = DocumentHelper.parseText(xmlResultString);//response.toString());
        Element xmlResult = domDoc.getRootElement();
        Element xmlResultList = xmlResult.element("resultList");

        // logger.info("root: " + xmlResultList.asXML());

        // get all objectFields
        for (Iterator i = xmlResultList.elementIterator("objectFields"); i.hasNext();) {
            Element objectFields = (Element) i.next();
            // get id
            String id = objectFields.elementText("pid");
            // get titel
            String title = objectFields.elementText("title");
            logger.info("Found Id: " + id + " Title: " + title);
            Project dendroProject = new Project(id, title);
            dendroProjects.add(dendroProject);
        }
    } catch (DocumentException e) {
        throw e;
    }

    return dendroProjects;
}

From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java

License:Apache License

/** Get the entityTree for the project, other datastreams are ignored
 *
 * note: Seems not much more efficient than retrieving the complete project
 * because it retrieves and unmarshall's the complete foxml
 * although it doesn't convert all the tridas datastreams with JAXB
 *
 * @param project The project for which the entityTree is retrieved
 *//*from w  w  w  .  ja  va2 s.  c o  m*/
public void retrieveEntityTree(Project project) throws DccdRepositoryException {
    if (project == null)
        throw new IllegalArgumentException("project must be specified");
    if (project.getSid() == null || project.getSid().length() == 0)
        throw new IllegalArgumentException("project must have an id specified");

    String id = project.getSid();

    // get the tree stream
    MIMETypedStream stream = null;
    try {
        stream = datastreamAccessor.getDatastreamDissemination(id, EntityTree.ENTITYTREE_ID, null);
    } catch (RepositoryException e) {
        throw new DccdRepositoryException(e);
    }

    String xmlStr = "";
    try {
        xmlStr = new String(stream.getStream(), "UTF-8");
    } catch (UnsupportedEncodingException e) {
        throw new DccdRepositoryException(e);
    }

    Document documentDom = null;
    try {
        documentDom = DocumentHelper.parseText(xmlStr);
    } catch (DocumentException e) {
        logger.error("dom4j exception");
        throw new DccdRepositoryException(e);
    }
    Element treeElement = documentDom.getRootElement();

    // show tree
    System.out.println("\n--- Begin tree struct ---");
    System.out.print(treeElement.asXML());
    System.out.println("\n--- End tree struct ---");

    // create the entity tree
    // use the tree from the project
    EntityTree entityTree = project.entityTree;

    //entityTree.buildTree(treeElement);
    entityTree.setProjectEntity(EntityTreeBuilder.buildTree(treeElement));
}

From source file:nl.knaw.dans.dccd.repository.fedora.FedoraRepositoryService.java

License:Apache License

/** Ingest given project data; store it
 *
 * @param project//ww w  .  ja va  2  s .c o  m
 * @throws RepositoryException
 */
public void ingest(Project project) throws DccdRepositoryException {//throws RepositoryException {
    if (project == null)
        throw new IllegalArgumentException("project must be specified");
    if (!project.hasTridas())
        throw new IllegalArgumentException("project must have tridas data");

    // create the entity tree, now use the tree from the project
    // note: recreate whole tree no matter what is already there!
    EntityTree entityTree = project.entityTree;
    entityTree.buildTree(project.getTridas());

    // create a new Fedora digital object for this dendro project and
    // have Fedora generate an unique Id for us
    DigitalObject dob = new DigitalObject(DobState.Active, DO_ID_NS);

    // properties
    dob.setLabel(entityTree.getProjectEntity().getTitle()); // use title from Project
    dob.setOwnerId("testDepositorId"); // This should be a real id someday!

    // Add to the Fedora DC, no need to make a separate datastream
    DublinCoreMetadata dcmd = new JiBXDublinCoreMetadata();
    dcmd.addLanguage(project.getTridasLanguage().getLanguage());
    dcmd.addTitle(entityTree.getProjectEntity().getTitle()); // now we must do it ourselves
    try {
        dob.addDatastreamVersion(dcmd);
    } catch (XMLSerializationException e) {
        logger.info("Could not serialize XML for the Dublin Core");
        throw new DccdRepositoryException(e);
    }

    /* could have a separate datastream
    Datastream dcDatastream = new Datastream("DCM", ControlGroup.X); // there is only one!
    dob.putDatastream(dcDatastream); // actually add the stream to the dataobject!
    dcDatastream.setState(Datastream.State.A);
    DatastreamVersion dcVersion = dcDatastream.addDatastreamVersion(dcDatastream.nextVersionId(), "text/xml");
    dcVersion.setLabel("dublincoremetadata");
    try {
       dcVersion.setXmlContent(dcmd.asElement());
    } catch (XMLSerializationException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
    }
     */

    // Add the entitytree structure datastream
    Datastream datastream = new Datastream(EntityTree.ENTITYTREE_ID, ControlGroup.X); // there is only one!
    dob.putDatastream(datastream); // actually add the stream to the dataobject!
    datastream.setState(Datastream.State.A);
    DatastreamVersion version = datastream.addDatastreamVersion(datastream.nextVersionId(), "text/xml");
    version.setLabel("entitytreestruct");
    version.setXmlContent(entityTree.getTreeStructAsDocument().getRootElement());

    // get all fragments convert them to datastreams
    // and add them to the digital object
    List<Entity> entities = entityTree.getEntities();
    for (Entity entity : entities) {
        String streamId = entity.getId();
        // TESTING Stress testing:
        // repeat this X times, change the Id as well, otherwise Fedoro won't take it
        //for(int i=0; i<200; i++) {
        //logger.info("i = "+ i);
        //streamId+="-"+Integer.toString(i);
        datastream = new Datastream(streamId, ControlGroup.X);
        dob.putDatastream(datastream); // actually add the stream to the dataobject!
        datastream.setState(Datastream.State.A);
        version = datastream.addDatastreamVersion(datastream.nextVersionId(), "text/xml");
        version.setLabel(entity.getLabel());//("entity"); // Note: use title?
        //set the uri for the xml content; our TRiDaS fragments
        //URI streamFormatURI;
        //try {
        //   streamFormatURI = new URI("http://www.tridas.org/1.2");
        //   version.setFormatURI(streamFormatURI);
        //} catch (URISyntaxException e) {
        //   e.printStackTrace();
        //}

        // add entity to the datastreamversion
        try {
            String xmlString = entity.getXMLString();
            // Note: maybe there is to much conversion going on here,
            // at the end there must be an xml string?
            Document domDoc = DocumentHelper.parseText(xmlString);
            Element xmlContent = domDoc.getRootElement();

            // only for the Project, add a xml:lang attribute
            //if (entity instanceof ProjectEntity) {
            //   xmlContent.addAttribute("xml:lang", project.getTridasLanguage().getLanguage());
            //}
            //logger.info("dom4j XML: \n"+ xmlContent.asXML());
            version.setXmlContent(xmlContent);
        } catch (DocumentException e) {
            logger.info("Could not parse XML");
            throw new DccdRepositoryException(e);
        }
    } // end, for all entities
    //}// end TESTING

    // Store digital object, with all streams in it
    String logMessage = new String("");
    try {
        objectManager.ingest(dob, logMessage);
        //         project.setSid(dob.getSid());
        project.setStoreId(dob.getSid());
        logger.info("ingested project with sid: " + dob.getSid());
    } catch (RepositoryException e) {
        throw new DccdRepositoryException(e);
    }

}