Example usage for org.jsoup.nodes Element html

Introduction

In this page you can find the example usage for org.jsoup.nodes Element html.

Prototype

public String html()

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:org.jtotus.database.NetworkOP.java

public BigDecimal fetchData(String stockName, DateTime date, int col) {
    BigDecimal result = null;/* ww w .j  a  v a2 s .co m*/
    URL url;

    System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s col:%d)\n", stockName,
            new StockType(stockName).getHexName(), date.toString(), col);

    try {
        url = new URL(this.buildRequest(date, stockName));

        Document doc = Jsoup.parse(url, 2 * 1000);

        Elements elems = doc.select("td");

        Iterator<Element> iter = elems.iterator();
        while (iter.hasNext()) {
            Element elem = iter.next();
            String data = elem.html();

            String datePattern = dateFormatter.print(date);

            //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>";
            if (data.indexOf(datePattern) != -1) {

                for (int i = 0; i < col; i++) {
                    elem = iter.next();
                }

                data = elem.text();
                String fdata = data.replace(',', '.');

                if (debug) {
                    System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern);
                }

                return BigDecimal.valueOf(Double.valueOf(fdata).doubleValue());
            }
        }

    } catch (IOException ex) {
        System.out.printf("Failed in :%s\n", "NetworkOP");
        //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:org.jtotus.database.NetworkOP.java

private double[] fetchDataPeriod(String stockName, DateTime fromDate, DateTime toDate, int col) {
    List<Double> values = new ArrayList<Double>();
    URL url;// w  w  w .  j a va2  s.c o m

    System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s-%s col:%d)\n", stockName,
            new StockType(stockName).getHexName(), fromDate.toString(), toDate.toString(), col);

    try {
        url = new URL(this.buildRequest(fromDate, toDate, stockName));

        Document doc = Jsoup.parse(url, 2 * 1000);

        Elements elems = doc.select("td");

        DateIterator dateIter = new DateIterator(fromDate, toDate);
        while (dateIter.hasNext()) {
            Iterator<Element> iter = elems.iterator();
            String datePattern = dateFormatter.print(dateIter.nextInCalendar());

            while (iter.hasNext()) {
                Element elem = iter.next();
                String data = elem.html();

                //System.out.printf("Fetching.. :%s\n", dateFormatter.print(dateIter.getCurrentAsCalendar()));
                //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>";
                if (data.indexOf(datePattern) != -1) {

                    for (int i = 0; i < col; i++) {
                        elem = iter.next();
                    }

                    data = elem.text();
                    String fdata = data.replace(',', '.');

                    if (debug) {
                        System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern);
                    }

                    values.add(Double.valueOf(fdata));
                    break;
                }
            }
        }

    } catch (IOException ex) {
        System.out.printf("Failed in :%s\n", "NetworkOP");
        //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex);
    }

    return ArrayUtils.toPrimitive(values.toArray(new Double[0]));
}

From source file:org.mar9000.space2latex.WikiPage.java

public static WikiPage loadForFormat(File file) throws IOException {
    String fileContent = IOUtils.readFileAsString(file);
    Document doc = Jsoup.parseBodyFragment(fileContent);
    // Maintain input string.
    doc.outputSettings().prettyPrint(false);
    Element body = doc.body();/*from w  w w.  jav a 2 s  . co m*/
    Element pageElement = body.select("page").first();
    String title = pageElement.attr("title");
    String id = pageElement.attr("id");
    Element pageContent = pageElement.select("content").first();
    WikiPage page = new WikiPage(null, title, id, pageContent.html());
    page.pageContent = pageContent;
    // Images.
    Elements images = body.select("wikiimages").first().select("wikiimage");
    for (Element imageElement : images) {
        WikiImage image = new WikiImage();
        String acKey = imageElement.select("ac|image").first().outerHtml();
        image.filename = imageElement.attr("pageid") + "/" + imageElement.attr("filename");
        page.images.put(acKey, image);
    }
    return page;
}

From source file:org.niord.core.publication.PublicationUtils.java

/**
 * Extracts the given message publication from the message
 *
 * @param message the message//w ww .  j  ava  2 s  . com
 * @param publication the publication to extract
 * @param lang the language
 * @return the message publication or null if not found
 */
public static MessagePublicationVo extractMessagePublication(MessageVo message, SystemPublicationVo publication,
        String lang) {
    // Sanity check
    if (message == null || publication == null || publication.getDesc(lang) == null
            || message.getDesc(lang) == null) {
        return null;
    }

    boolean internal = publication.getMessagePublication() == MessagePublication.INTERNAL;
    String pubHtml = internal ? message.getDesc(lang).getInternalPublication()
            : message.getDesc(lang).getPublication();
    if (StringUtils.isBlank(pubHtml)) {
        return null;
    }

    PublicationDescVo pubDesc = publication.getDesc(lang);

    Document doc = Jsoup.parseBodyFragment(pubHtml);

    String pubAttr = "[publication=" + publication.getPublicationId() + "]";
    Element e = doc.select("a" + pubAttr + ",span" + pubAttr).first();
    if (e != null) {
        MessagePublicationVo msgPub = new MessagePublicationVo();
        msgPub.setPublication(publication);
        String link = e.attr("href");
        if (StringUtils.isNotBlank(link) && pubDesc != null && !Objects.equals(link, pubDesc.getLink())) {
            msgPub.setLink(link);
        }
        String text = TextUtils.removeTrailingDot(e.html());

        // Internal publications have brackets around them
        if (internal && text.startsWith("[") && text.endsWith("]")) {
            text = text.substring(1, text.length() - 1);
        }

        String format = pubDesc != null ? pubDesc.getMessagePublicationFormat() : null;
        if (StringUtils.isNotBlank(text) && StringUtils.isNotBlank(format)
                && format.contains("${parameters}")) {
            int index = format.indexOf("${parameters}");
            String prefix = format.substring(0, index);
            String suffix = format.substring(index + "${parameters}".length());
            if (text.startsWith(prefix) && text.endsWith(suffix)) {
                String params = text.substring(prefix.length(), text.length() - suffix.length());
                msgPub.setParameters(params);
            }
        }

        return msgPub;
    }
    return null;
}

From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageEmailContactLink.java

/**
 * Try to find an email address in both the HTML (so that we can get attributes
 * of elements) as well as in the plain text (in case the HTML has been scrambled
 * to obfuscate the address).//  w w w  . ja  v  a  2 s. c o  m
 * 
 * @param element
 * @throws DoesNotContainContactLinkException
 * @throws MultipleContactLinksOfSameTypeFoundException 
 */
public StructuredPageEmailContactLink(Element element)
        throws DoesNotContainContactLinkException, MultipleContactLinksOfSameTypeFoundException {
    super(element);
    String content = element.html();
    try {
        this.address = this.findLinkInString(content);
    } catch (DoesNotContainContactLinkException ex) {
        content = element.text();
        this.address = this.findLinkInString(content);
    }
}

From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageWebContactLink.java

/**
  * Fetches the web page specified by the contact weblink and extracts
  * an email from it. The email gets stored in the address field for retrieval
  * by the Individual extractor. Note that we fetch the first such email found
  * and discard others.//  w  ww.  j  ava2  s.c o m
  * 
  * @return 
  * @throws org.norvelle.addressdiscoverer.exceptions.DoesNotContainContactLinkException 
  */
public String fetchEmailFromWeblink() throws DoesNotContainContactLinkException {
    String body;

    if (this.address.startsWith("javascript:"))
        throw new DoesNotContainContactLinkException();

    // Try to fetch the webpage linked to
    try {
        String addr = StructuredPageContactLinkLocator.resolveAddress(this.address);
        URL u = new URL(addr);
        u.toURI();
        URLConnection con = u.openConnection();
        InputStream in = con.getInputStream();
        String encoding = con.getContentEncoding();
        encoding = encoding == null ? "UTF-8" : encoding;
        String html = IOUtils.toString(in, encoding);
        Document soup = Jsoup.parse(html);
        Element bodyElement = soup.select("body").first();
        body = bodyElement.html();
    } catch (URISyntaxException | IOException ex) {
        throw new DoesNotContainContactLinkException();
    }

    // Now, extract the email if we can.
    String matchFound = this.findEmail(body);
    if (matchFound.isEmpty()) {
        throw new DoesNotContainContactLinkException();
    }
    return matchFound;
}

From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java

private void checkLicenseParagraph(Document processedAboutHtmlFileDocument) {
    Document validAboutHtmlFileDocument = Jsoup.parse(validAboutHtmlFileContent);

    Elements validAboutHtmlFileParagraphTags = validAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG);
    // the paragraph with index 1 in the valid about.html file
    // is the license paragraph
    Element validAboutHtmlFileLicenseParagraph = validAboutHtmlFileParagraphTags.get(1);
    String validAboutHtmlFileLicenseParagraphContent = validAboutHtmlFileLicenseParagraph.html();
    Elements processedFileParagraphTags = processedAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG);

    if (!isElementProvided(processedFileParagraphTags, validAboutHtmlFileLicenseParagraphContent)) {
        log(0, "Invalid or missing license paragraph in the about.html file. " + VALID_ABOUT_HTML_FILE_LINK_MSG
                + validAboutHtmlFileURL);
    }/*from  ww  w  .j  a  v a  2s . co m*/
}

From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java

private boolean isElementProvided(Elements elements, String searchedElement) {
    for (Element element : elements) {
        String elementContent = element.html();
        if (elementContent.replaceAll("\\s", "").equals(searchedElement.replaceAll("\\s", ""))) {
            return true;
        }//from   ww  w  . jav a  2s .c  om
    }
    return false;
}

From source file:org.opens.rules.doc.utils.exportdomtocsv.ExportDomToCsv.java

/**
 * Before using it please set the FOLDER variable with the path where you
 * want to create your csv file./*from   ww  w . ja  v a 2  s.co  m*/
 *
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    File ref = FileUtils.getFile(FOLDER);
    JsoupFunc jsf = new JsoupFunc();
    Document doc = jsf.getDocument();
    Elements thematiques = doc.select("div.thematique");
    StringBuilder sb = new StringBuilder();
    String testCode = "";
    String testLabel = "";
    String critere = "";
    for (int i = 2; i < thematiques.size(); i++) {
        String themeIndex = String.valueOf(i - 1) + "";
        String theme = (thematiques.get(i).child(0).text() + "");
        Elements criteres = thematiques.get(i).select("h3");
        for (int j = 1; j < criteres.size(); j++) {
            Element critereLevel = criteres.get(j);
            String critereH3String = critereLevel.toString();
            String level = critereH3String.substring(critereH3String.indexOf("[") + 1,
                    critereH3String.indexOf("]")) + "";
            Elements tests = criteres.get(j).nextElementSibling().select("[id^=test-]");
            try {
                critere = criteres.get(j).id().substring(5, 10) + "";
            } catch (StringIndexOutOfBoundsException sioobe) {
                try {
                    critere = criteres.get(j).id().substring(5, 9) + "";
                } catch (StringIndexOutOfBoundsException sioobe2) {
                    critere = criteres.get(j).id().substring(5, 8) + "";
                }
            }
            String[] critereArray = criteres.get(j).text().split("] ");
            String critereLabel = critereArray[1].toString() + "";
            for (Element el : tests) {
                Pattern digitPattern = Pattern.compile("\\d+\\.\\d+\\.\\d+\\s?\\:?\\s?");
                Matcher matcher = digitPattern.matcher(el.text());
                if (matcher.find()) {
                    String testLabelReplace = el.html()
                            .replace("index.php", "http://www.accessiweb.org/index.php").replace("\n", "");
                    testLabel = testLabelReplace.substring(matcher.end(), testLabelReplace.length()) + "";
                }
                try {
                    testCode = el.id().substring(5, 12) + "";
                } catch (StringIndexOutOfBoundsException sioobe) {
                    try {
                        testCode = (el.id().substring(5, 11) + "");
                    } catch (StringIndexOutOfBoundsException sioobe3) {
                        testCode = (el.id().substring(5, 10) + "");
                    }
                }
                sb.append(themeIndex + theme + critere + critereLabel + testCode + testLabel + level + "\n");
            }
        }
    }
    FileUtils.writeStringToFile(ref, sb.toString());
}

From source file:org.sakaiproject.nakamura.files.migrator.PageMigrator.java

protected JSONObject addRowToPage(JSONObject row, JSONObject page, int columnsForNextRow, Element htmlElement)
        throws JSONException {
    if (!isEmpty(htmlElement)) {
        generateNewCell(null, "htmlblock", page, row, 0, generateHtmlBlock(htmlElement.html()));
    }/* ww w .  j a  va2  s  . co m*/
    boolean rowHasContent = false;
    for (int i = 0; i < row.getJSONArray("columns").length(); i++) {
        if (row.getJSONArray("columns").getJSONObject(i).getJSONArray("elements").length() > 0) {
            rowHasContent = true;
            break;
        }
    }
    boolean rowAlreadyPresent = false;
    for (int i = 0; i < page.getJSONArray("rows").length(); i++) {
        if (row == page.getJSONArray("rows").getJSONObject(i)) {
            rowAlreadyPresent = true;
            break;
        }
    }
    if (rowHasContent && !rowAlreadyPresent) {
        page.accumulate("rows", row);
    }

    return generateEmptyRow(columnsForNextRow > 0 ? columnsForNextRow : 1);
}