List of usage examples for org.jsoup.nodes Element html
public String html()
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @return An arraylist of forum contents *//*from w ww . ja va 2 s. c o m*/ public void getThreadContents(Document doc) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); if (pageNumbers.first() != null) { Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; Log.d(TAG, String.format("This Page: %s, Final Page: %s", thisPage, finalPage)); } else { Log.d(TAG, "Thread only contains one page"); } } catch (Exception e) { Log.e(TAG, "We had an error with pagination", e); } // Is user thread admin?? Elements threadTools = doc.select("div[id=threadtools_menu] > form > table"); if (threadTools.text().contains(MODERATION_TOOLS)) { Log.d(TAG, "<><> User has administrative rights here! <><>"); } else { //adminContent.setVisibility(View.GONE); lv.removeHeaderView(adminContent); } // Get the user's actual ID, there is a chance they never got it // before UserProfile.getInstance().setUserId(HtmlFormUtils.getInputElementValueByName(doc, "loggedinuser")); // Get Post Number and security token securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); Elements pNumber = doc.select("a[href^=http://www.rx8club.com/newreply.php?do=newreply&noquote=1&p=]"); String pNumberHref = pNumber.attr("href"); postNumber = pNumberHref.substring(pNumberHref.lastIndexOf("=") + 1); threadNumber = doc.select("input[name=searchthreadid]").attr("value"); Elements posts = doc.select("div[id=posts]").select("div[id^=edit]"); Log.v(TAG, String.format("Parsing through %d posts", posts.size())); for (Element post : posts) { try { Elements innerPost = post.select("table[id^=post]"); // User Control Panel Elements userCp = innerPost.select("td[class=alt2]"); Elements userDetail = userCp.select("div[class=smallfont]"); Elements userSubDetail = userDetail.last().select("div"); Elements userAvatar = userDetail.select("img[alt$=Avatar]"); // User Information PostModel pv = new PostModel(); pv.setUserName(userCp.select("div[id^=postmenu]").text()); pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn() ? UserProfile.getInstance().getUsername().equals(pv.getUserName()) : false); pv.setUserTitle(userDetail.first().text()); pv.setUserImageUrl(userAvatar.attr("src")); pv.setPostDate(innerPost.select("td[class=thead]").first().text()); pv.setPostId(Utils.parseInts(post.attr("id"))); pv.setRootThreadUrl(currentPageLink); // get Likes if any exist Elements eLikes = innerPost.select("div[class*=vbseo_liked] > a"); List<String> likes = new ArrayList<String>(); for (Element eLike : eLikes) likes.add(eLike.text()); pv.setLikes(likes); Iterator<Element> itr = userSubDetail.listIterator(); while (itr.hasNext()) { String txt = itr.next().text(); if (txt.contains("Location:")) pv.setUserLocation(txt); else if (txt.contains("Posts:")) pv.setUserPostCount(txt); else if (txt.contains("Join Date:")) pv.setJoinDate(txt); } // User Post Content pv.setUserPost(formatUserPost(innerPost)); // User signature try { Element userSig = innerPost.select("div[class=konafilter]").first(); pv.setUserSignature(userSig.html()); } catch (NullPointerException npe) { } Elements postAttachments = innerPost.select("a[id^=attachment]"); if (postAttachments != null && !postAttachments.isEmpty()) { ArrayList<String> attachments = new ArrayList<String>(); for (Element postAttachment : postAttachments) { attachments.add(postAttachment.attr("href")); } pv.setAttachments(attachments); } pv.setSecurityToken(securityToken); // Make sure we aren't adding a blank user post if (pv.getUserPost() != null) postlist.add(pv); } catch (Exception e) { Log.w(TAG, "Error Parsing Post...Probably Deleted"); } } }
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Format the user post by removing the vb style quotes and the * duplicate youtube links// w w w .ja v a 2s . com * @param innerPost The element that contains the inner post * @return The formatted string */ private String formatUserPost(Elements innerPost) { try { Element ipost = innerPost.select("td[class=alt1]").select("div[id^=post_message]").first(); // Only if there is a post to key off of if (ipost != null) { // Remove the duplicate youtube links (this is caused by a plugin on // the forum that embeds youtube videos automatically) for (Element embedded : ipost.select("div[id^=ame_doshow_post_]")) embedded.remove(); // Remove the vbulletin quotes return Utils.reformatQuotes(ipost.html()); } else { return null; } } catch (Exception e) { Log.e(TAG, "Error Parsing Post", e); return null; } }
From source file:com.semfapp.adamdilger.semf.protectPlanActivity.java
public void createPdf() { Document documentTemplate = null; Element body = null; try {/*from ww w . ja va 2s .c o m*/ documentTemplate = Pdf.getTemplate(getApplicationContext(), null); body = Jsoup.parse(getAssets().open("protectPlan.html"), "utf-8", "http://www.example.com"); Elements lists = body.select(".list_box"); //Lists html Elements ArrayList<String[]> arrayList = data.getArray(); //editText string arrays //for each Element in lists, add each bullet from arrayList.string[] as a <p> for (int x = 0; x < lists.size(); x++) { String f = ""; for (String bullet : arrayList.get(x)) { f += "<p>" + bullet + "</p>"; } lists.get(x).html(f); } } catch (Exception e) { System.out.println("ERROR: " + e.toString()); } documentTemplate.getElementById("main").html(body.html()); String filePath = MainActivity.pdf.createFilePath(this, "Protect Plan"); MainActivity.pdf.createPdfToFile(this, documentTemplate.html(), filePath, null); pdfAttatchment = new File(filePath); }
From source file:net.meiolania.apps.habrahabr.fragments.companies.loader.CompaniesShowLoader.java
@Override public CompanyFullData loadInBackground() { CompanyFullData company = new CompanyFullData(); try {/*from ww w. ja v a2 s . co m*/ Log.i(TAG, "Loading a page: " + url); Document document = Jsoup.connect(url).get(); Elements datas = document.select("div.company_profile > dl"); int i = 0; for (Element data : datas) { switch (i) { case INFO_DATE: company.setDate(data.getElementsByTag("dd").first().text()); break; case INFO_SITE: company.setCompanyUrl(data.getElementsByTag("dd").first().text()); break; case INFO_INDUSTRIES: company.setIndustries(data.getElementsByTag("dd").first().text()); break; case INFO_LOCATION: company.setLocation(data.getElementsByTag("dd").first().text()); break; case INFO_QUANTITY: company.setQuantity(data.getElementsByTag("dd").first().text()); break; case INFO_SUMMARY: company.setSummary(data.select("dd.summary").first().html()); break; case INFO_MANAGEMENT: // TODO: think of a new algorithm Elements managers = data.getElementsByTag("dd"); StringBuilder managerContent = new StringBuilder(); for (Element manager : managers) managerContent.append(manager.html()); company.setManagement(managerContent.toString()); break; case INFO_DEVELOPMENT_STAGES: Elements stages = data.getElementsByTag("dd"); StringBuilder stagesContent = new StringBuilder(); for (Element stage : stages) stagesContent.append(stage.html()); company.setDevelopmentStages(stagesContent.toString()); break; } i++; } } catch (IOException e) { } return company; }
From source file:net.meiolania.apps.habrahabr.fragments.posts.loader.PostShowLoader.java
@Override public PostsFullData loadInBackground() { PostsFullData data = new PostsFullData(); try {/* w w w. jav a 2 s. c om*/ Document document = Jsoup.connect(url).get(); Element title = document.select("span.post_title").first(); Element hubs = document.select("div.hubs").first(); Element content = document.select("div.content").first(); Element date = document.select("div.published").first(); Element author = document.select("div.author > a").first(); if (title != null) { data.setUrl(url); data.setTitle(title.text()); data.setHubs(hubs.text()); data.setContent(content.html()); data.setDate(date.text()); data.setAuthor(author.text()); } else data.setContent(context.getString(R.string.error_404)); } catch (IOException e) { } return data; }
From source file:net.meiolania.apps.habrahabr.fragments.qa.loader.QaShowLoader.java
@Override public QaFullData loadInBackground() { QaFullData data = new QaFullData(); try {//from w w w . j a va2 s.c om Log.i(TAG, "Loading a page: " + url); Document document = Jsoup.connect(url).get(); Element title = document.select("span.post_title").first(); Element hubs = document.select("div.hubs").first(); Element content = document.select("div.content").first(); Element tags = document.select("ul.tags").first(); Element date = document.select("div.published").first(); Element author = document.select("div.author > a").first(); Element answers = document.select("span#comments_count").first(); data.setTitle(title.text()); data.setHubs(hubs.text()); data.setContent(content.html()); data.setTags(tags.text()); data.setDate(date.text()); data.setAuthor(author.text()); data.setAnswers(answers.text()); } catch (IOException e) { } return data; }
From source file:org.apache.nifi.GetHTMLElement.java
/** * Extracts the HTML value based on the configuration values. * * @return value from the parsed HTML element */// w w w . ja v a2 s. c om private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) { if (StringUtils.isEmpty(prependValue)) { prependValue = ""; } if (StringUtils.isEmpty(appendValue)) { appendValue = ""; } switch (outputType) { case ELEMENT_HTML: return prependValue + ele.html() + appendValue; case ELEMENT_TEXT: return prependValue + ele.text() + appendValue; case ELEMENT_DATA: return prependValue + ele.data() + appendValue; case ELEMENT_ATTRIBUTE: return prependValue + ele.attr(attrKey) + appendValue; default: return prependValue + ele.html() + appendValue; } }
From source file:org.apache.nifi.TestModifyHTMLElement.java
@Test public void testModifyHTML() throws Exception { final String MOD_VALUE = "Newly modified HTML to replace " + GDR_WEATHER_TEXT; testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID); testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML); testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); testRunner.run();/*from w w w .j a va 2s . c om*/ testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0); testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); assertTrue(ffs.size() == 1); String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. Document doc = Jsoup.parse(data); Elements eles = doc.select("#" + GDR_ID); Element ele = eles.get(0); assertTrue(StringUtils.equals(MOD_VALUE, ele.html())); }
From source file:org.apache.nifi.TestModifyHTMLElement.java
@Test public void testModifyValueContainsHTMLCharacters() throws Exception { final String MOD_VALUE = "Text that contains > and < characters"; testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID); testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML); testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); testRunner.run();//from w w w . j a v a2 s. com testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0); testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); assertTrue(ffs.size() == 1); String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. Document doc = Jsoup.parse(data); Elements eles = doc.select("#" + GDR_ID); Element ele = eles.get(0); assertTrue(StringUtils.equals(MOD_VALUE, ele.text())); assertTrue(StringUtils.equals(MOD_VALUE.replace(">", ">").replace("<", "<"), ele.html())); }
From source file:org.apache.nifi.TestPutHTMLElement.java
@Test public void testAddNewElementToRoot() throws Exception { final String MOD_VALUE = "<p>modified value</p>"; testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "body"); testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT); testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); testRunner.run();//from ww w.j av a2 s. c o m testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0); testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); assertTrue(ffs.size() == 1); String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. Document doc = Jsoup.parse(data); Elements eles = doc.select("body > p"); Element ele = eles.get(0); assertTrue(StringUtils.equals(MOD_VALUE.replace("<p>", "").replace("</p>", ""), ele.html())); }