List of usage examples for org.jsoup.nodes Element html
public String html()
From source file:org.jtotus.database.NetworkOP.java
public BigDecimal fetchData(String stockName, DateTime date, int col) { BigDecimal result = null;/* ww w .j a v a2 s .co m*/ URL url; System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s col:%d)\n", stockName, new StockType(stockName).getHexName(), date.toString(), col); try { url = new URL(this.buildRequest(date, stockName)); Document doc = Jsoup.parse(url, 2 * 1000); Elements elems = doc.select("td"); Iterator<Element> iter = elems.iterator(); while (iter.hasNext()) { Element elem = iter.next(); String data = elem.html(); String datePattern = dateFormatter.print(date); //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>"; if (data.indexOf(datePattern) != -1) { for (int i = 0; i < col; i++) { elem = iter.next(); } data = elem.text(); String fdata = data.replace(',', '.'); if (debug) { System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern); } return BigDecimal.valueOf(Double.valueOf(fdata).doubleValue()); } } } catch (IOException ex) { System.out.printf("Failed in :%s\n", "NetworkOP"); //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex); } return result; }
From source file:org.jtotus.database.NetworkOP.java
private double[] fetchDataPeriod(String stockName, DateTime fromDate, DateTime toDate, int col) { List<Double> values = new ArrayList<Double>(); URL url;// w w w . j a va2 s.c o m System.out.printf("NetworkOP fetchData(%s,hex:%s, date:%s-%s col:%d)\n", stockName, new StockType(stockName).getHexName(), fromDate.toString(), toDate.toString(), col); try { url = new URL(this.buildRequest(fromDate, toDate, stockName)); Document doc = Jsoup.parse(url, 2 * 1000); Elements elems = doc.select("td"); DateIterator dateIter = new DateIterator(fromDate, toDate); while (dateIter.hasNext()) { Iterator<Element> iter = elems.iterator(); String datePattern = dateFormatter.print(dateIter.nextInCalendar()); while (iter.hasNext()) { Element elem = iter.next(); String data = elem.html(); //System.out.printf("Fetching.. :%s\n", dateFormatter.print(dateIter.getCurrentAsCalendar())); //String formatHttp = "<div class=\"Ensimmainen\">\n" + datePattern + "\n</div>"; if (data.indexOf(datePattern) != -1) { for (int i = 0; i < col; i++) { elem = iter.next(); } data = elem.text(); String fdata = data.replace(',', '.'); if (debug) { System.out.printf("Fetched value from OP bank ->:%s for date:%s\n", fdata, datePattern); } values.add(Double.valueOf(fdata)); break; } } } } catch (IOException ex) { System.out.printf("Failed in :%s\n", "NetworkOP"); //Logger.getLogger(NetworkGoogle.class.getName()).log(Level.SEVERE, null, ex); } return ArrayUtils.toPrimitive(values.toArray(new Double[0])); }
From source file:org.mar9000.space2latex.WikiPage.java
public static WikiPage loadForFormat(File file) throws IOException { String fileContent = IOUtils.readFileAsString(file); Document doc = Jsoup.parseBodyFragment(fileContent); // Maintain input string. doc.outputSettings().prettyPrint(false); Element body = doc.body();/*from w w w. jav a 2 s . co m*/ Element pageElement = body.select("page").first(); String title = pageElement.attr("title"); String id = pageElement.attr("id"); Element pageContent = pageElement.select("content").first(); WikiPage page = new WikiPage(null, title, id, pageContent.html()); page.pageContent = pageContent; // Images. Elements images = body.select("wikiimages").first().select("wikiimage"); for (Element imageElement : images) { WikiImage image = new WikiImage(); String acKey = imageElement.select("ac|image").first().outerHtml(); image.filename = imageElement.attr("pageid") + "/" + imageElement.attr("filename"); page.images.put(acKey, image); } return page; }
From source file:org.niord.core.publication.PublicationUtils.java
/** * Extracts the given message publication from the message * * @param message the message//w ww . j ava 2 s . com * @param publication the publication to extract * @param lang the language * @return the message publication or null if not found */ public static MessagePublicationVo extractMessagePublication(MessageVo message, SystemPublicationVo publication, String lang) { // Sanity check if (message == null || publication == null || publication.getDesc(lang) == null || message.getDesc(lang) == null) { return null; } boolean internal = publication.getMessagePublication() == MessagePublication.INTERNAL; String pubHtml = internal ? message.getDesc(lang).getInternalPublication() : message.getDesc(lang).getPublication(); if (StringUtils.isBlank(pubHtml)) { return null; } PublicationDescVo pubDesc = publication.getDesc(lang); Document doc = Jsoup.parseBodyFragment(pubHtml); String pubAttr = "[publication=" + publication.getPublicationId() + "]"; Element e = doc.select("a" + pubAttr + ",span" + pubAttr).first(); if (e != null) { MessagePublicationVo msgPub = new MessagePublicationVo(); msgPub.setPublication(publication); String link = e.attr("href"); if (StringUtils.isNotBlank(link) && pubDesc != null && !Objects.equals(link, pubDesc.getLink())) { msgPub.setLink(link); } String text = TextUtils.removeTrailingDot(e.html()); // Internal publications have brackets around them if (internal && text.startsWith("[") && text.endsWith("]")) { text = text.substring(1, text.length() - 1); } String format = pubDesc != null ? pubDesc.getMessagePublicationFormat() : null; if (StringUtils.isNotBlank(text) && StringUtils.isNotBlank(format) && format.contains("${parameters}")) { int index = format.indexOf("${parameters}"); String prefix = format.substring(0, index); String suffix = format.substring(index + "${parameters}".length()); if (text.startsWith(prefix) && text.endsWith(suffix)) { String params = text.substring(prefix.length(), text.length() - suffix.length()); msgPub.setParameters(params); } } return msgPub; } return null; }
From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageEmailContactLink.java
/** * Try to find an email address in both the HTML (so that we can get attributes * of elements) as well as in the plain text (in case the HTML has been scrambled * to obfuscate the address).// w w w . ja v a 2 s. c o m * * @param element * @throws DoesNotContainContactLinkException * @throws MultipleContactLinksOfSameTypeFoundException */ public StructuredPageEmailContactLink(Element element) throws DoesNotContainContactLinkException, MultipleContactLinksOfSameTypeFoundException { super(element); String content = element.html(); try { this.address = this.findLinkInString(content); } catch (DoesNotContainContactLinkException ex) { content = element.text(); this.address = this.findLinkInString(content); } }
From source file:org.norvelle.addressdiscoverer.parse.structured.StructuredPageWebContactLink.java
/** * Fetches the web page specified by the contact weblink and extracts * an email from it. The email gets stored in the address field for retrieval * by the Individual extractor. Note that we fetch the first such email found * and discard others.// w ww. j ava2 s.c o m * * @return * @throws org.norvelle.addressdiscoverer.exceptions.DoesNotContainContactLinkException */ public String fetchEmailFromWeblink() throws DoesNotContainContactLinkException { String body; if (this.address.startsWith("javascript:")) throw new DoesNotContainContactLinkException(); // Try to fetch the webpage linked to try { String addr = StructuredPageContactLinkLocator.resolveAddress(this.address); URL u = new URL(addr); u.toURI(); URLConnection con = u.openConnection(); InputStream in = con.getInputStream(); String encoding = con.getContentEncoding(); encoding = encoding == null ? "UTF-8" : encoding; String html = IOUtils.toString(in, encoding); Document soup = Jsoup.parse(html); Element bodyElement = soup.select("body").first(); body = bodyElement.html(); } catch (URISyntaxException | IOException ex) { throw new DoesNotContainContactLinkException(); } // Now, extract the email if we can. String matchFound = this.findEmail(body); if (matchFound.isEmpty()) { throw new DoesNotContainContactLinkException(); } return matchFound; }
From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java
private void checkLicenseParagraph(Document processedAboutHtmlFileDocument) { Document validAboutHtmlFileDocument = Jsoup.parse(validAboutHtmlFileContent); Elements validAboutHtmlFileParagraphTags = validAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG); // the paragraph with index 1 in the valid about.html file // is the license paragraph Element validAboutHtmlFileLicenseParagraph = validAboutHtmlFileParagraphTags.get(1); String validAboutHtmlFileLicenseParagraphContent = validAboutHtmlFileLicenseParagraph.html(); Elements processedFileParagraphTags = processedAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG); if (!isElementProvided(processedFileParagraphTags, validAboutHtmlFileLicenseParagraphContent)) { log(0, "Invalid or missing license paragraph in the about.html file. " + VALID_ABOUT_HTML_FILE_LINK_MSG + validAboutHtmlFileURL); }/*from ww w .j a v a 2s . co m*/ }
From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java
private boolean isElementProvided(Elements elements, String searchedElement) { for (Element element : elements) { String elementContent = element.html(); if (elementContent.replaceAll("\\s", "").equals(searchedElement.replaceAll("\\s", ""))) { return true; }//from ww w . jav a 2s .c om } return false; }
From source file:org.opens.rules.doc.utils.exportdomtocsv.ExportDomToCsv.java
/** * Before using it please set the FOLDER variable with the path where you * want to create your csv file./*from ww w . ja v a 2 s.co m*/ * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { File ref = FileUtils.getFile(FOLDER); JsoupFunc jsf = new JsoupFunc(); Document doc = jsf.getDocument(); Elements thematiques = doc.select("div.thematique"); StringBuilder sb = new StringBuilder(); String testCode = ""; String testLabel = ""; String critere = ""; for (int i = 2; i < thematiques.size(); i++) { String themeIndex = String.valueOf(i - 1) + ""; String theme = (thematiques.get(i).child(0).text() + ""); Elements criteres = thematiques.get(i).select("h3"); for (int j = 1; j < criteres.size(); j++) { Element critereLevel = criteres.get(j); String critereH3String = critereLevel.toString(); String level = critereH3String.substring(critereH3String.indexOf("[") + 1, critereH3String.indexOf("]")) + ""; Elements tests = criteres.get(j).nextElementSibling().select("[id^=test-]"); try { critere = criteres.get(j).id().substring(5, 10) + ""; } catch (StringIndexOutOfBoundsException sioobe) { try { critere = criteres.get(j).id().substring(5, 9) + ""; } catch (StringIndexOutOfBoundsException sioobe2) { critere = criteres.get(j).id().substring(5, 8) + ""; } } String[] critereArray = criteres.get(j).text().split("] "); String critereLabel = critereArray[1].toString() + ""; for (Element el : tests) { Pattern digitPattern = Pattern.compile("\\d+\\.\\d+\\.\\d+\\s?\\:?\\s?"); Matcher matcher = digitPattern.matcher(el.text()); if (matcher.find()) { String testLabelReplace = el.html() .replace("index.php", "http://www.accessiweb.org/index.php").replace("\n", ""); testLabel = testLabelReplace.substring(matcher.end(), testLabelReplace.length()) + ""; } try { testCode = el.id().substring(5, 12) + ""; } catch (StringIndexOutOfBoundsException sioobe) { try { testCode = (el.id().substring(5, 11) + ""); } catch (StringIndexOutOfBoundsException sioobe3) { testCode = (el.id().substring(5, 10) + ""); } } sb.append(themeIndex + theme + critere + critereLabel + testCode + testLabel + level + "\n"); } } } FileUtils.writeStringToFile(ref, sb.toString()); }
From source file:org.sakaiproject.nakamura.files.migrator.PageMigrator.java
protected JSONObject addRowToPage(JSONObject row, JSONObject page, int columnsForNextRow, Element htmlElement) throws JSONException { if (!isEmpty(htmlElement)) { generateNewCell(null, "htmlblock", page, row, 0, generateHtmlBlock(htmlElement.html())); }/* ww w . j a va2 s . co m*/ boolean rowHasContent = false; for (int i = 0; i < row.getJSONArray("columns").length(); i++) { if (row.getJSONArray("columns").getJSONObject(i).getJSONArray("elements").length() > 0) { rowHasContent = true; break; } } boolean rowAlreadyPresent = false; for (int i = 0; i < page.getJSONArray("rows").length(); i++) { if (row == page.getJSONArray("rows").getJSONObject(i)) { rowAlreadyPresent = true; break; } } if (rowHasContent && !rowAlreadyPresent) { page.accumulate("rows", row); } return generateEmptyRow(columnsForNextRow > 0 ? columnsForNextRow : 1); }