List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @param isMarket True if the link is from a marketplace category *//* www . jav a2 s.c o m*/ public void getCategoryContents(Document doc, String id, boolean isMarket) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; } catch (Exception e) { } // Make sure id contains only numbers if (!isNewTopicActivity) id = Utils.parseInts(id); // Grab each thread Elements threadListing = doc.select("table[id=threadslist] > tbody > tr"); for (Element thread : threadListing) { try { boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false, isPoll = false; String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "", lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = ""; Elements announcementContainer = thread.select("td[colspan=5]"); Elements threadTitleContainer = thread.select("a[id^=thread_title]"); // We could have two different types of threads. Announcement threads are // completely different than the other types of threads (sticky, locked, etc) // so we need to play some games here if (announcementContainer != null && !announcementContainer.isEmpty()) { Log.d(TAG, "Announcement Thread Found"); Elements annThread = announcementContainer.select("div > a"); Elements annUser = announcementContainer.select("div > span[class=smallfont]"); formattedTitle = "Announcement: " + annThread.first().text(); threadUser = annUser.last().text(); threadLink = annThread.attr("href"); isAnnounce = true; } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) { Element threadLinkEl = thread.select("a[id^=thread_title]").first(); Element repliesText = thread.select("td[title^=Replies]").first(); Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first(); Element threadicon = thread.select("img[id^=thread_statusicon_]").first(); Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first(); Element threadDateFull = thread.select("td[title^=Replies:] > div").first(); try { isSticky = threadDiv.text().contains("Sticky:"); } catch (Exception e) { } try { isPoll = threadDiv.text().contains("Poll:"); } catch (Exception e) { } try { String icSt = threadicon.attr("src"); isLocked = (icSt.contains("lock") && icSt.endsWith(".gif")); } catch (Exception e) { } String preString = ""; try { preString = threadDiv.select("span > b").text(); } catch (Exception e) { } try { hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty(); } catch (Exception e) { } // Find the last page if it exists try { lastPage = threadDiv.select("span").last().select("a").last().attr("href"); } catch (Exception e) { } threadDate = threadDateFull.text(); int findAMPM = threadDate.indexOf("M") + 1; threadDate = threadDate.substring(0, findAMPM); String totalPostsInThreadTitle = threadicon.attr("alt"); if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0) totalPosts = totalPostsInThreadTitle.split(" ")[2]; // Remove page from the link String realLink = Utils.removePageFromLink(link); if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) { String txt = repliesText.getElementsByClass("alt2").attr("title"); String splitter[] = txt.split(" ", 4); postCount = splitter[1].substring(0, splitter[1].length() - 1); views = splitter[3]; try { if (this.isNewTopicActivity) forum = thread.select("td[class=alt1]").last().text(); } catch (Exception e) { } formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "", preString.length() == 0 ? "" : preString + " ", threadLinkEl.text()); } threadUser = threaduser.text(); lastUser = repliesText.select("a[href*=members]").text(); threadLink = threadLinkEl.attr("href"); } // Add our thread to our list as long as the thread // contains a title if (!formattedTitle.equals("")) { ThreadModel tv = new ThreadModel(); tv.setTitle(formattedTitle); tv.setStartUser(threadUser); tv.setLastUser(lastUser); tv.setLink(threadLink); tv.setLastLink(lastPage); tv.setPostCount(postCount); tv.setMyPosts(totalPosts); tv.setViewCount(views); tv.setLocked(isLocked); tv.setSticky(isSticky); tv.setAnnouncement(isAnnounce); tv.setPoll(isPoll); tv.setHasAttachment(hasAttachment); tv.setForum(forum); tv.setLastPostTime(threadDate); threadlist.add(tv); } else if (thread.text() .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) { Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size())); if (threadlist.size() > 0) { ThreadModel ltv = threadlist.get(threadlist.size() - 1); Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle())); } if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext())) threadlist.add(new ThreadModel(true)); else { Log.d(TAG, "User Chose To Hide Old Threads"); break; } } } catch (Exception e) { Log.e(TAG, "Error Parsing That Thread...", e); Log.d(TAG, "Thread may have moved"); } } }
From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageViewFragment.java
/** * Construct the view elements//from w w w . j a v a 2s. c o m */ private void constructView() { AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() { @Override protected void onPreExecute() { loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading), getString(R.string.pleaseWait), true); } @Override protected Void doInBackground(Void... params) { String link = getArguments().getString("link"); Document doc = VBForumFactory.getInstance().get(getActivity(), VBForumFactory.getRootAddress() + "/" + link); if (doc != null) { securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); pmid = HtmlFormUtils.getInputElementValueByName(doc, "pmid"); title = HtmlFormUtils.getInputElementValueByName(doc, "title"); Elements userPm = doc.select("table[id^=post]"); publishProgress(getString(R.string.asyncDialogLoadingPM)); // User Control Panel Elements userCp = userPm.select("td[class=alt2]"); Elements userDetail = userCp.select("div[class=smallfont]"); Elements userSubDetail = userDetail.last().select("div"); Elements userAvatar = userDetail.select("img[alt$=Avatar]"); Elements postMessage = doc.select("div[id=post_message_]"); PMPostModel pv = new PMPostModel(); pv.setUserName(userCp.select("div[id^=postmenu]").text()); pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn() ? UserProfile.getInstance().getUsername().equals(pv.getUserName()) : false); pv.setUserTitle(userDetail.first().text()); pv.setUserImageUrl(Utils.resolveUrl(userAvatar.attr("src"))); pv.setPostDate(userPm.select("td[class=thead]").first().text()); // userSubDetail // 0 - full container , full container // 1 - Trader Score , Trader Score // 2 - Join Date , Join Date // 3 - Post Count , Location // 4 - Blank , Post Count // 5 - , Blank || Social // Iterator<Element> itr = userSubDetail.listIterator(); while (itr.hasNext()) { String txt = itr.next().text(); if (txt.contains("Location:")) pv.setUserLocation(txt); else if (txt.contains("Posts:")) pv.setUserPostCount(txt); else if (txt.contains("Join Date:")) pv.setJoinDate(txt); } // User Post Content pv.setUserPost(formatUserPost(postMessage)); pmlist.add(pv); TextView comment = (TextView) getView().findViewById(R.id.pmitem_comment); Elements textarea = doc.select("textarea[id=vB_Editor_QR_textarea]"); if (textarea != null) { comment.setText(textarea.first().text()); } updateList(); } return null; } @Override protected void onProgressUpdate(String... progress) { if (loadingDialog != null) loadingDialog.setMessage(progress[0]); } @Override protected void onPostExecute(Void result) { try { loadingDialog.dismiss(); loadingDialog = null; } catch (Exception e) { Log.w(TAG, e.getMessage()); } } }; updaterTask.execute(); }
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @return An arraylist of forum contents *///from w w w . j a v a 2 s. c o m public void getThreadContents(Document doc) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); if (pageNumbers.first() != null) { Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; Log.d(TAG, String.format("This Page: %s, Final Page: %s", thisPage, finalPage)); } else { Log.d(TAG, "Thread only contains one page"); } } catch (Exception e) { Log.e(TAG, "We had an error with pagination", e); } // Is user thread admin?? Elements threadTools = doc.select("div[id=threadtools_menu] > form > table"); if (threadTools.text().contains(MODERATION_TOOLS)) { Log.d(TAG, "<><> User has administrative rights here! <><>"); } else { //adminContent.setVisibility(View.GONE); lv.removeHeaderView(adminContent); } // Get the user's actual ID, there is a chance they never got it // before UserProfile.getInstance().setUserId(HtmlFormUtils.getInputElementValueByName(doc, "loggedinuser")); // Get Post Number and security token securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); Elements pNumber = doc.select("a[href^=http://www.rx8club.com/newreply.php?do=newreply&noquote=1&p=]"); String pNumberHref = pNumber.attr("href"); postNumber = pNumberHref.substring(pNumberHref.lastIndexOf("=") + 1); threadNumber = doc.select("input[name=searchthreadid]").attr("value"); Elements posts = doc.select("div[id=posts]").select("div[id^=edit]"); Log.v(TAG, String.format("Parsing through %d posts", posts.size())); for (Element post : posts) { try { Elements innerPost = post.select("table[id^=post]"); // User Control Panel Elements userCp = innerPost.select("td[class=alt2]"); Elements userDetail = userCp.select("div[class=smallfont]"); Elements userSubDetail = userDetail.last().select("div"); Elements userAvatar = userDetail.select("img[alt$=Avatar]"); // User Information PostModel pv = new PostModel(); pv.setUserName(userCp.select("div[id^=postmenu]").text()); pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn() ? UserProfile.getInstance().getUsername().equals(pv.getUserName()) : false); pv.setUserTitle(userDetail.first().text()); pv.setUserImageUrl(userAvatar.attr("src")); pv.setPostDate(innerPost.select("td[class=thead]").first().text()); pv.setPostId(Utils.parseInts(post.attr("id"))); pv.setRootThreadUrl(currentPageLink); // get Likes if any exist Elements eLikes = innerPost.select("div[class*=vbseo_liked] > a"); List<String> likes = new ArrayList<String>(); for (Element eLike : eLikes) likes.add(eLike.text()); pv.setLikes(likes); Iterator<Element> itr = userSubDetail.listIterator(); while (itr.hasNext()) { String txt = itr.next().text(); if (txt.contains("Location:")) pv.setUserLocation(txt); else if (txt.contains("Posts:")) pv.setUserPostCount(txt); else if (txt.contains("Join Date:")) pv.setJoinDate(txt); } // User Post Content pv.setUserPost(formatUserPost(innerPost)); // User signature try { Element userSig = innerPost.select("div[class=konafilter]").first(); pv.setUserSignature(userSig.html()); } catch (NullPointerException npe) { } Elements postAttachments = innerPost.select("a[id^=attachment]"); if (postAttachments != null && !postAttachments.isEmpty()) { ArrayList<String> attachments = new ArrayList<String>(); for (Element postAttachment : postAttachments) { attachments.add(postAttachment.attr("href")); } pv.setAttachments(attachments); } pv.setSecurityToken(securityToken); // Make sure we aren't adding a blank user post if (pv.getUserPost() != null) postlist.add(pv); } catch (Exception e) { Log.w(TAG, "Error Parsing Post...Probably Deleted"); } } }
From source file:com.rickendirk.rsgwijzigingen.ZoekService.java
private void addMessage(Wijzigingen wijzigingen, Document doc) { Elements messageSpan = doc.select("body > div > div > div > table > tbody > tr:nth-child(1)"); if (!messageSpan.isEmpty()) { String message = messageSpan.first().text(); wijzigingen.setMessage(message); }//from www.j av a 2s . c o m }
From source file:im.ene.lab.attiq.ui.activities.ProfileActivity.java
@SuppressWarnings("unused") public void onEventMainThread(DocumentEvent event) { if (event.document != null) { Elements stats = event.document.getElementsByClass("userActivityChart_stats"); Element statBlock;// ww w .ja va 2 s . co m if (!UIUtil.isEmpty(stats) && (statBlock = stats.first()) != null) { Elements statElements = statBlock.children(); Integer contribution = null; for (Element element : statElements) { String unit = element.getElementsByClass("userActivityChart_statUnit").text(); if ("Contribution".equals(unit.trim())) { try { contribution = Integer .valueOf(element.getElementsByClass("userActivityChart_statCount").text()); } catch (NumberFormatException er) { er.printStackTrace(); } break; } } if (contribution != null) { ((State) mState).contribution = contribution; EventBus.getDefault().post(new StateEvent<>(getClass().getSimpleName(), true, null, mState)); } } } }
From source file:org.aankor.animenforadio.api.WebsiteGate.java
private boolean updateNowPlaying(String nowPlaying) { Document doc = Jsoup.parse(nowPlaying); Elements spans = doc.select("div .float-container .row .span6"); Matcher matcher = mainNowPlayingPattern.matcher(spans.first().text()); if (!matcher.find()) { unsetCurrentSong();//from w ww . j av a2 s . c o m return false; } SongInfo newSongInfo = new SongInfo(matcher.group(1), matcher.group(2), matcher.group(3), matcher.group(4), matcher.group(5), matcher.group(6)); Elements e = doc.select("div img"); if (!e.isEmpty()) { String artUrl = e.attr("src"); newSongInfo.setArtUrl(artUrl); } else newSongInfo.unsetArtUrl(); newSongInfo.setSongId(Integer.valueOf(spans.get(1).select("a[data-songinfo]").attr("data-songinfo"))); int songPosTime = Integer.valueOf(spans.get(1).select("#np_timer").attr("rel")); long currentTime = (new Date()).getTime(); currentSongEndTime = currentTime + songPosTime * 1000l; String songPosTimeStr = spans.get(1).select("#np_timer").text(); newSongInfo.setDuration(Integer.valueOf(spans.get(1).select("#np_time").attr("rel"))); newSongInfo.setDurationStr(spans.get(1).select("#np_time").text()); matcher = raitingNowPlayingPattern.matcher(spans.get(1).html()); if (matcher.find()) newSongInfo.setRating(matcher.group(1)); else newSongInfo.unsetRating(); newSongInfo.setFavourites(Integer.valueOf(spans.get(1) .select(".favourite-container span[data-favourite-count]").attr("data-favourite-count"))); matcher = nowPlayingBarPattern.matcher(doc.select("#nowPlayingBar").attr("style")); double nowPlayingPos = 0.0; if (matcher.find()) nowPlayingPos = Double.valueOf(matcher.group(1)); if ((currentSong != null) && newSongInfo.getArtUrl().equals(currentSong.getArtUrl())) newSongInfo.setArtBmp(currentSong.getArtBmp(), currentSong.getMiniArtBmp()); currentSong = newSongInfo; currentSongPos = new SongPos(songPosTime, songPosTimeStr, nowPlayingPos); return true; }
From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java
/** * ????/*from w w w .ja v a 2s . c om*/ * @param elements * @return */ protected String getExtractText(Elements elements) { if (elements.size() == 0) return null; String temp = ""; if (attr.equalsIgnoreCase("tostring")) { return temp = elements.toString(); } else { if (index == -1 && StringUtils.isNotBlank(this.regex)) { for (Element e : elements) { Element element = e; if (element.select(this.regex).size() > 0) { return temp = e.text(); } } return temp; } else { if (index > -1 && index < elements.size()) { return elements.get(index).text(); } } return elements.first().text(); } /*if(attr.equals("tostring")){ if(index==0 || index>elements.size()) temp = elements.first().toString(); else temp = elements.get(index).toString(); }else{ if(index==0 || index>elements.size()) temp = elements.first().text(); else temp = elements.get(index).text(); } if(null!=pattern){ Matcher m = pattern.matcher(temp); if(m.find()){ temp = m.group(1); } }*/ //return temp; }
From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java
/** * ??????//w w w.ja v a 2 s. c om * @param elements * @param attr * @return */ protected String getExtractAttr(Elements elements, String attr) { String temp = ""; if (attr.equalsIgnoreCase("tostring")) { return temp = elements.attr(attr).toString(); } else { if (index == -1 && StringUtils.isNotBlank(this.regex)) { for (Element e : elements) { Element element = e; if (element.select(this.regex).size() > 0) { return temp = e.attr(attr); } } return temp; } else { if (index > -1 && index < elements.size()) { return elements.get(index).attr(attr); } } return elements.first().attr(attr); } /*if(null!=pattern){ Matcher m = pattern.matcher(temp); if(m.find()){ temp = m.group(1); } }*/ //return temp; }
From source file:org.asqatasun.crawler.CrawlerImpl.java
/** * Waiting for a better implementation, we parse here the html content * to detect the presence of the rel=canonical property. * @param content//from w w w.j a v a 2 s . c o m * @return whether the current page defines a rel canonical Url and whether * this url is different from the current url. */ public final boolean isRelCanonicalPage(Content content) { // @TODO make this implementation cleaner if (!excludeRelCanonical) { return false; } if (!(content instanceof SSP)) { return false; } if (StringUtils.isBlank(((SSP) content).getSource())) { return false; } Elements relCanonical = Jsoup.parse(((SSP) content).getSource()).select(REL_CANONICAL_CSS_LIKE_QUERY); if (relCanonical.isEmpty() || relCanonical.size() > 1) { return false; } // At this step, we are sure that the rel canonical is defined and // is unique String href = relCanonical.first().attr("href"); if (href.equals(".")) { return false; } if (href.endsWith("/")) { href = href.substring(0, href.length() - 1); } if (href.startsWith("/")) { Elements base = Jsoup.parse(((SSP) content).getSource()).select(BASE_CSS_LIKE_QUERY); if (!base.isEmpty()) { if (StringUtils.endsWith(base.first().attr("href"), "/")) { href = StringUtils.join(base.first().attr("href"), href.substring(1)); } else { href = StringUtils.join(base.first().attr("href") + href); } LOGGER.debug("(BASE CASE) The concat href " + href); } else { URI contractUri; try { contractUri = new URI(content.getURI()); href = StringUtils.join(contractUri.getScheme(), "://", contractUri.getHost(), href); LOGGER.debug("(NORMAL CASE) The concat href " + href); } catch (URISyntaxException ex) { LOGGER.error("Error when creating uri object with url " + content.getURI()); } } } if (href.contains("//")) { href = href.substring(href.indexOf("//") + 2); } String currentUrl = content.getURI(); if (currentUrl.endsWith("/")) { currentUrl = currentUrl.substring(0, currentUrl.length() - 1); } if (currentUrl.contains("//")) { currentUrl = currentUrl.substring(currentUrl.indexOf("//") + 2); } if (currentUrl.equals(href)) { LOGGER.info("rel canonical present but points to itself " + content.getURI()); return false; } return true; }
From source file:org.asqatasun.rules.textbuilder.PathElementBuilderTest.java
/** * Test of buildTextFromElement method, of class PathElementBuilder. */// w w w .j a v a 2 s .c om public void testBuildTextFromElement() throws IOException { LOGGER.info("buildTextFromElement"); Document document = Jsoup .parse(FileUtils.readFileToString(new File("src/test/resources/pathBuilder/test1.html"))); Element el = document.select("footer").first(); PathElementBuilder instance = new PathElementBuilder(true); String result = instance.buildTextFromElement(el); LOGGER.debug("result = " + result); Elements elementsFromBuiltPath = document.select(result); assertEquals(1, elementsFromBuiltPath.size()); assertEquals(el, elementsFromBuiltPath.first()); }