List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:net.meiolania.apps.habrahabr.fragments.hubs.loader.HubsLoader.java
@Override public ArrayList<HubsData> loadInBackground() { ArrayList<HubsData> data = new ArrayList<HubsData>(); try {//from w w w .jav a2 s.c o m String readyUrl = url.replace("%page%", String.valueOf(page)); Log.i(TAG, "Loading a page: " + readyUrl); Document document = Jsoup.connect(readyUrl).get(); Elements hubs = document.select("div.hub"); for (Element hub : hubs) { HubsData hubsData = new HubsData(); Element index = hub.select("div.habraindex").first(); Element title = hub.select("div.title > a").first(); Element stat = hub.select("div.stat").first(); hubsData.setTitle(title.text()); hubsData.setUrl(title.attr("abs:href")); hubsData.setStat(stat.text()); hubsData.setIndex(index.text()); data.add(hubsData); } } catch (IOException e) { } return data; }
From source file:net.meiolania.apps.habrahabr.fragments.posts.loader.PostCommentsLoader.java
private void parseComments(Elements comments, int level) { for (Element comment : comments) { CommentsData commentsData = new CommentsData(); /*//from w ww.java2s. c o m * TODO: Really awful. Need to rewrite this. */ String commentId = comment.attr("id"); if (containedComments.contains(commentId)) continue; containedComments.add(commentId); Element name = comment.select("a.username").first(); Element message = comment.select("div.message").first(); Element linkToComment = comment.select("a.link_to_comment").first(); Element score = comment.select("span.score").first(); commentsData.setScore(score.text()); commentsData.setUrl(linkToComment.attr("abs:href")); commentsData.setAuthorUrl(name.attr("abs:href")); commentsData.setAuthor(name.text()); commentsData.setComment(message.text()); commentsData.setLevel(level); commentsDatas.add(commentsData); Elements replyComments = comment.select("div.reply_comments > div.comment_item"); parseComments(replyComments, level + 1); } }
From source file:net.meiolania.apps.habrahabr.fragments.posts.loader.PostsLoader.java
@Override public ArrayList<PostsData> loadInBackground() { ArrayList<PostsData> data = new ArrayList<PostsData>(); try {/*from ww w. j ava 2 s . c om*/ String readyUrl = url.replace("%page%", String.valueOf(page)); Log.i(TAG, "Loading a page: " + readyUrl); Document document = Jsoup.connect(readyUrl).get(); Elements posts = document.select("div.post"); for (Element post : posts) { PostsData postsData = new PostsData(); Element postTitle = post.select("a.post_title").first(); Element hubs = post.select("div.hubs").first(); Element date = post.select("div.published").first(); Element author = post.select("div.author > a").first(); Element comments = post.select("div.comments > span.all").first(); Element score = post.select("span.score").first(); postsData.setTitle(postTitle.text()); postsData.setUrl(postTitle.attr("abs:href")); postsData.setHubs(hubs.text()); postsData.setDate(date.text()); postsData.setAuthor(author != null ? author.text() : ""); postsData.setComments(comments != null ? comments.text() : "0"); postsData.setScore(score.text()); data.add(postsData); } } catch (IOException e) { } return data; }
From source file:net.meiolania.apps.habrahabr.fragments.qa.loader.QaCommentsLoader.java
@Override public ArrayList<CommentsData> loadInBackground() { ArrayList<CommentsData> data = new ArrayList<CommentsData>(); try {//from w w w .ja v a 2 s .co m Document document = Jsoup.connect(url).get(); Elements answers = document.select("div.answer"); for (Element answer : answers) { CommentsData commentsData = new CommentsData(); Element name = answer.select("a.username").first(); Element message = answer.select("div.message").first(); Element linkToComment = answer.select("a.link_to_comment").first(); Element score = answer.select("span.score").first(); commentsData.setUrl(linkToComment.attr("abs:href")); commentsData.setAuthor(name.text()); commentsData.setAuthorUrl(name.attr("abs:href")); commentsData.setComment(message.text()); commentsData.setLevel(0); commentsData.setScore(score.text()); data.add(commentsData); Elements comments = answer.select("div.comment_item"); for (Element comment : comments) { commentsData = new CommentsData(); name = comment.select("span.info > a").first(); message = comment.select("span.text").first(); commentsData.setUrl(linkToComment.attr("abs:href")); commentsData.setAuthorUrl(name.attr("abs:href")); commentsData.setAuthor(name.text()); commentsData.setComment(message.text()); commentsData.setLevel(1); commentsData.setScore(""); data.add(commentsData); } } } catch (IOException e) { } return data; }
From source file:net.meiolania.apps.habrahabr.fragments.qa.loader.QaLoader.java
@Override public ArrayList<QaData> loadInBackground() { ArrayList<QaData> data = new ArrayList<QaData>(); try {// ww w .j a v a 2s . c om String readyUrl = url.replace("%page%", String.valueOf(page)); Log.i(TAG, "Loading a page: " + readyUrl); Document document = Jsoup.connect(readyUrl).get(); Elements qaList = document.select("div.post"); for (Element qa : qaList) { QaData qaData = new QaData(); Element title = qa.select("a.post_title").first(); Element hubs = qa.select("div.hubs").first(); Element answers = qa.select("div.informative").first(); Element date = qa.select("div.published").first(); Element author = qa.select("div.author > a").first(); Element score = qa.select("span.score").first(); qaData.setTitle(title.text()); qaData.setUrl(title.attr("abs:href")); qaData.setHubs(hubs.text()); qaData.setAnswers(answers.text()); qaData.setDate(date.text()); qaData.setAuthor(author.text()); qaData.setScore(score.text()); data.add(qaData); } } catch (IOException e) { } return data; }
From source file:net.meiolania.apps.habrahabr.fragments.users.loader.UsersLoader.java
@Override public ArrayList<UsersData> loadInBackground() { ArrayList<UsersData> data = new ArrayList<UsersData>(); try {/*w w w .j a v a 2 s. com*/ Log.i(TAG, "Loading a page: " + url); Document document = Jsoup.connect(url).get(); Elements users = document.select("div.user"); for (Element user : users) { UsersData usersData = new UsersData(); Element rating = user.select("div.rating").first(); Element karma = user.select("div.karma").first(); Element avatar = user.select("div.avatar > a > img").first(); Element name = user.select("div.userlogin > div.username > a").first(); Element lifetime = user.select("div.info > div.lifetime").first(); usersData.setName(name.text()); usersData.setUrl(name.attr("abs:href")); usersData.setRating(rating.text()); usersData.setKarma(karma.text()); usersData.setAvatar(avatar.attr("src")); usersData.setLifetime(lifetime.text()); data.add(usersData); } } catch (IOException e) { } return data; }
From source file:net.thetabx.gcd.activity.ChatActivity.java
private void parse(String response) { Document html = Jsoup.parse(response); Elements blocks = html.select(".mChatHover>div"); messages = new ArrayList<ChatMessage>(); for (Element block : blocks) { messages.add(new ChatMessage(block.select("a").get(0), block.select("span").get(0), block.select("div").get(1))); }/*from w w w. ja va 2s . c o m*/ header.setText(String.format("Got %d messages", messages.size())); }
From source file:net.trustie.model.OpenHubProject_Model.java
private void handleQuickRef(Element quickRef) { Elements itemNames = quickRef.select("dt"); Elements itemValues = quickRef.select("dd"); Element e = null;//ww w .jav a 2 s .c om Element eValue = null; for (int i = 0; i < itemNames.size(); i++) { e = itemNames.get(i); eValue = itemValues.get(i); String refName = e.text(); switch (refName) { case "Organization:": { this.organization = eValue.text(); break; } case "Project Links:": { Elements links = eValue.select("a"); String[] tmp = new String[links.size()]; Element ele = null; for (int j = 0; j < links.size(); j++) { ele = links.get(j); tmp[j] = ele.text() + Seperator.SOURCE_SEPERATOR + ele.attr("href"); } this.projectLinks = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR); break; } case "Code Locations:": { Elements locs = eValue.select("a"); if (locs.size() == 0) { this.codeLocation = eValue.text(); } else { Element link = locs.get(0); this.codeLocation = link.text() + Seperator.SOURCE_SEPERATOR + link.attr("href"); } break; } case "Licenses:": { Elements links = eValue.select("a"); List<String> listLicenses = new ArrayList<String>(); // String[] tmp = new String[links.size()]; for (int j = 0; j < links.size(); j++) { listLicenses.add(links.get(j).text()); // tmp[j] = links.get(j).text(); } this.licenses = StringHandler.combineTags(listLicenses); break; } case "Similar Projects:": { // System.out.println(eValue); Elements projects = eValue.select("td[width=49%]"); // System.out.println(projects.size()); String[] tmp = new String[projects.size()]; Element ele = null; for (int j = 0; j < projects.size(); j++) { ele = projects.get(j); Element project = ele.select("a").get(0); tmp[j] = project.text() + Seperator.SOURCE_SEPERATOR + project.attr("href"); } this.similarProjects = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR); break; } case "Managers:": { if ("Become the first manager for BugSystem".equals(eValue.text())) { break; } Elements users = eValue.select("a"); String[] tmp = new String[users.size()]; Element ele = null; for (int j = 0; j < users.size(); j++) { ele = users.get(j); tmp[j] = ele.text() + Seperator.SOURCE_SEPERATOR + ele.attr("href"); } this.managers = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR); break; } default: { break; } } } }
From source file:net.trustie.model.SFProject_Model.java
public void afterProcess(Page page) { // long start = System.currentTimeMillis(); this.url = page.getPageUrl(); // justify it's enterprise or bluesteel user // this.html = page.getHtml().toString(); this.urlMd5 = DigestUtils.md5Hex(page.getPageUrl()); SimpleDateFormat bartDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); this.collectTime = bartDateFormat.format(new Date()); this.pageMd5 = DigestUtils.md5Hex(urlMd5 + lastUpdate + feature + downloadCount + stars); Document doc = page.getHtml().getDocument(); Elements bodyEles = doc.select("body"); if (bodyEles.size() > 0) { Element body = bodyEles.get(0); String bodyType = body.attr("id"); if ("pg_project".equals(bodyType)) { String type = body.attr("class"); if (type.equals("bluesteel user")) { // bluesteel user extractPageBluesteelUser(doc); } else if (type.equals("enterprise user")) { // enterprise user extractPageEnterpriseUser(doc); } else { // others }/*from w w w. ja va 2 s.co m*/ if (lastUpdate.contains("ago")) { this.lastUpdate = getTime(lastUpdate); } if (registeredTime.contains("ago")) { this.registeredTime = getTime((registeredTime)); } if (lastUpdate.equals("")) { this.lastUpdate = "0000-00-00 00:00:00"; } if (registeredTime.equals("")) { this.registeredTime = "0000-00-00 00:00:00"; } } else { // name Elements nameEles = body.select("div#proj_header div.proj-title h2"); this.name = nameEles.text(); // desc Elements descEles = body.select("div#top_left div#home_intro div#proj-overview p"); this.desc = descEles.text(); // features Elements featuresEles = body.select("div#top_left div#home_intro div#proj-overview ul"); this.feature = featuresEles.text(); } this.lastUpdate = DateHandler.formatAllTypeDate(lastUpdate, page.getTime()); this.registeredTime = DateHandler.formatAllTypeDate(registeredTime, page.getTime()); } // long end = System.currentTimeMillis(); // System.out.println(end-start); // System.out.println(this.toString()); // System.out.println(types.get(0).attr("class")); // Document doc=page.getHtml().getDocument(); }
From source file:net.trustie.model.SFProject_Model.java
private void extractPageEnterpriseUser(Document doc) { // name//from w w w .ja v a2 s . co m Elements nameElements = doc.select("div#project-header div.content-group h1.project-name a"); name = nameElements.text(); // maintainers // stars Elements starsElements = doc .select("div#project-header div.content-group div.project-rating span:not(.rating-count)"); for (int i = 0; i < starsElements.size(); i++) { String attr = starsElements.get(i).attr("class"); if (attr.equals("rating star")) { stars += 1; } else if (attr.equals("rating star_half")) { stars += 0.5; } else { stars += 0; } } // download count Elements downloadElements = doc .select("div#project-header div.content-group div.group a.download-stats span.data"); if (downloadElements.size() > 0) { String strDownloadCount = downloadElements.get(0).text(); strDownloadCount = strDownloadCount.replaceAll("[^\\d]", ""); this.downloadCount = strDownloadCount; } // last update Elements lastUpdateElements = doc .select("div#project-header div.content-group div.project-rating time.dateUpdated"); if (lastUpdateElements.size() > 0) { lastUpdate = lastUpdateElements.get(0).attr("datetime"); } // platform Elements downloadOSElements = doc.select("div#project-header div.content-group div.download-os"); platform = downloadOSElements.text(); // desc Elements descElements = doc .select("section#overview div.content-group section.primary-content p#project-description"); desc = descElements.text(); // categories // license // feature Elements featureElements = doc.select( "section#overview div.content-group section.primary-content section#project-features div[class=content editable]"); feature = featureElements.text(); // language // intended audience // user interface // program language // registered time // additional detail Elements enterpriseAddtionalElements = doc.select( "section#overview div.content-group section.primary-content aside#additional-details section.content section.project-info"); for (int i = 0; i < enterpriseAddtionalElements.size(); i++) { Element element = enterpriseAddtionalElements.get(i); // System.out.println(element.html()); // System.out.println("*************************************"); Elements tags = element.select("header"); if (tags.size() > 0) { String tag = tags.text(); if (tag.equals("Languages")) { language = element.select("section.content").text(); } else if (tag.equals("Intended Audience")) { intendedAudience = element.select("section.content").text(); } else if (tag.equals("User Interface")) { userInterface = element.select("section.content").text(); } else if (tag.equals("Programming Language")) { programmingLanguage = element.select("section.content").text(); } else if (tag.equals("Registered")) { registeredTime = element.select("section.content").text(); } else if (tag.equals("Last Updated")) { lastUpdate = element.select("section.content").text(); } else if (tag.equals("Maintainers")) { maintainers = element.select("a").text(); } else if (tag.equals("License")) { license = element.select("section.content").text(); } else if (tag.equals("Categories")) { categories = element.select("a").text(); } else { } } } }