Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:net.meiolania.apps.habrahabr.fragments.hubs.loader.HubsLoader.java

@Override
public ArrayList<HubsData> loadInBackground() {
    ArrayList<HubsData> data = new ArrayList<HubsData>();

    try {//from w  w  w .jav a2 s.c  o m
        String readyUrl = url.replace("%page%", String.valueOf(page));

        Log.i(TAG, "Loading a page: " + readyUrl);

        Document document = Jsoup.connect(readyUrl).get();

        Elements hubs = document.select("div.hub");

        for (Element hub : hubs) {
            HubsData hubsData = new HubsData();

            Element index = hub.select("div.habraindex").first();
            Element title = hub.select("div.title > a").first();
            Element stat = hub.select("div.stat").first();

            hubsData.setTitle(title.text());
            hubsData.setUrl(title.attr("abs:href"));
            hubsData.setStat(stat.text());
            hubsData.setIndex(index.text());

            data.add(hubsData);
        }
    } catch (IOException e) {
    }

    return data;
}

From source file:net.meiolania.apps.habrahabr.fragments.posts.loader.PostCommentsLoader.java

private void parseComments(Elements comments, int level) {
    for (Element comment : comments) {
        CommentsData commentsData = new CommentsData();

        /*//from   w ww.java2s.  c o m
         * TODO: Really awful. Need to rewrite this.
         */
        String commentId = comment.attr("id");
        if (containedComments.contains(commentId))
            continue;

        containedComments.add(commentId);

        Element name = comment.select("a.username").first();
        Element message = comment.select("div.message").first();
        Element linkToComment = comment.select("a.link_to_comment").first();
        Element score = comment.select("span.score").first();

        commentsData.setScore(score.text());
        commentsData.setUrl(linkToComment.attr("abs:href"));
        commentsData.setAuthorUrl(name.attr("abs:href"));
        commentsData.setAuthor(name.text());
        commentsData.setComment(message.text());
        commentsData.setLevel(level);

        commentsDatas.add(commentsData);

        Elements replyComments = comment.select("div.reply_comments > div.comment_item");

        parseComments(replyComments, level + 1);
    }
}

From source file:net.meiolania.apps.habrahabr.fragments.posts.loader.PostsLoader.java

@Override
public ArrayList<PostsData> loadInBackground() {
    ArrayList<PostsData> data = new ArrayList<PostsData>();

    try {/*from  ww w.  j ava 2 s . c  om*/
        String readyUrl = url.replace("%page%", String.valueOf(page));

        Log.i(TAG, "Loading a page: " + readyUrl);

        Document document = Jsoup.connect(readyUrl).get();

        Elements posts = document.select("div.post");

        for (Element post : posts) {
            PostsData postsData = new PostsData();

            Element postTitle = post.select("a.post_title").first();
            Element hubs = post.select("div.hubs").first();
            Element date = post.select("div.published").first();
            Element author = post.select("div.author > a").first();
            Element comments = post.select("div.comments > span.all").first();
            Element score = post.select("span.score").first();

            postsData.setTitle(postTitle.text());
            postsData.setUrl(postTitle.attr("abs:href"));
            postsData.setHubs(hubs.text());
            postsData.setDate(date.text());
            postsData.setAuthor(author != null ? author.text() : "");
            postsData.setComments(comments != null ? comments.text() : "0");
            postsData.setScore(score.text());

            data.add(postsData);
        }
    } catch (IOException e) {
    }

    return data;
}

From source file:net.meiolania.apps.habrahabr.fragments.qa.loader.QaCommentsLoader.java

@Override
public ArrayList<CommentsData> loadInBackground() {
    ArrayList<CommentsData> data = new ArrayList<CommentsData>();

    try {//from w  w  w .ja v a 2  s .co m
        Document document = Jsoup.connect(url).get();

        Elements answers = document.select("div.answer");

        for (Element answer : answers) {
            CommentsData commentsData = new CommentsData();

            Element name = answer.select("a.username").first();
            Element message = answer.select("div.message").first();
            Element linkToComment = answer.select("a.link_to_comment").first();
            Element score = answer.select("span.score").first();

            commentsData.setUrl(linkToComment.attr("abs:href"));
            commentsData.setAuthor(name.text());
            commentsData.setAuthorUrl(name.attr("abs:href"));
            commentsData.setComment(message.text());
            commentsData.setLevel(0);
            commentsData.setScore(score.text());

            data.add(commentsData);

            Elements comments = answer.select("div.comment_item");

            for (Element comment : comments) {
                commentsData = new CommentsData();

                name = comment.select("span.info > a").first();
                message = comment.select("span.text").first();

                commentsData.setUrl(linkToComment.attr("abs:href"));
                commentsData.setAuthorUrl(name.attr("abs:href"));
                commentsData.setAuthor(name.text());
                commentsData.setComment(message.text());
                commentsData.setLevel(1);
                commentsData.setScore("");

                data.add(commentsData);
            }
        }
    } catch (IOException e) {
    }

    return data;
}

From source file:net.meiolania.apps.habrahabr.fragments.qa.loader.QaLoader.java

@Override
public ArrayList<QaData> loadInBackground() {
    ArrayList<QaData> data = new ArrayList<QaData>();

    try {// ww  w  .j  a  v a  2s  .  c  om
        String readyUrl = url.replace("%page%", String.valueOf(page));

        Log.i(TAG, "Loading a page: " + readyUrl);

        Document document = Jsoup.connect(readyUrl).get();

        Elements qaList = document.select("div.post");

        for (Element qa : qaList) {
            QaData qaData = new QaData();

            Element title = qa.select("a.post_title").first();
            Element hubs = qa.select("div.hubs").first();
            Element answers = qa.select("div.informative").first();
            Element date = qa.select("div.published").first();
            Element author = qa.select("div.author > a").first();
            Element score = qa.select("span.score").first();

            qaData.setTitle(title.text());
            qaData.setUrl(title.attr("abs:href"));
            qaData.setHubs(hubs.text());
            qaData.setAnswers(answers.text());
            qaData.setDate(date.text());
            qaData.setAuthor(author.text());
            qaData.setScore(score.text());

            data.add(qaData);
        }
    } catch (IOException e) {
    }

    return data;
}

From source file:net.meiolania.apps.habrahabr.fragments.users.loader.UsersLoader.java

@Override
public ArrayList<UsersData> loadInBackground() {
    ArrayList<UsersData> data = new ArrayList<UsersData>();

    try {/*w  w  w .j  a v a 2 s. com*/
        Log.i(TAG, "Loading a page: " + url);

        Document document = Jsoup.connect(url).get();
        Elements users = document.select("div.user");

        for (Element user : users) {
            UsersData usersData = new UsersData();

            Element rating = user.select("div.rating").first();
            Element karma = user.select("div.karma").first();
            Element avatar = user.select("div.avatar > a > img").first();
            Element name = user.select("div.userlogin > div.username > a").first();
            Element lifetime = user.select("div.info > div.lifetime").first();

            usersData.setName(name.text());
            usersData.setUrl(name.attr("abs:href"));
            usersData.setRating(rating.text());
            usersData.setKarma(karma.text());
            usersData.setAvatar(avatar.attr("src"));
            usersData.setLifetime(lifetime.text());

            data.add(usersData);
        }
    } catch (IOException e) {
    }

    return data;
}

From source file:net.thetabx.gcd.activity.ChatActivity.java

private void parse(String response) {
    Document html = Jsoup.parse(response);
    Elements blocks = html.select(".mChatHover>div");
    messages = new ArrayList<ChatMessage>();

    for (Element block : blocks) {
        messages.add(new ChatMessage(block.select("a").get(0), block.select("span").get(0),
                block.select("div").get(1)));
    }/*from w  w w.  ja va  2s . c  o  m*/
    header.setText(String.format("Got %d messages", messages.size()));
}

From source file:net.trustie.model.OpenHubProject_Model.java

private void handleQuickRef(Element quickRef) {
    Elements itemNames = quickRef.select("dt");
    Elements itemValues = quickRef.select("dd");
    Element e = null;//ww  w  .jav  a  2 s  .c om
    Element eValue = null;
    for (int i = 0; i < itemNames.size(); i++) {
        e = itemNames.get(i);
        eValue = itemValues.get(i);
        String refName = e.text();
        switch (refName) {
        case "Organization:": {
            this.organization = eValue.text();
            break;
        }
        case "Project Links:": {
            Elements links = eValue.select("a");
            String[] tmp = new String[links.size()];
            Element ele = null;
            for (int j = 0; j < links.size(); j++) {
                ele = links.get(j);
                tmp[j] = ele.text() + Seperator.SOURCE_SEPERATOR + ele.attr("href");
            }
            this.projectLinks = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR);
            break;
        }
        case "Code Locations:": {
            Elements locs = eValue.select("a");
            if (locs.size() == 0) {
                this.codeLocation = eValue.text();
            } else {
                Element link = locs.get(0);
                this.codeLocation = link.text() + Seperator.SOURCE_SEPERATOR + link.attr("href");
            }

            break;
        }
        case "Licenses:": {
            Elements links = eValue.select("a");
            List<String> listLicenses = new ArrayList<String>();
            // String[] tmp = new String[links.size()];
            for (int j = 0; j < links.size(); j++) {
                listLicenses.add(links.get(j).text());
                // tmp[j] = links.get(j).text();
            }
            this.licenses = StringHandler.combineTags(listLicenses);
            break;
        }
        case "Similar Projects:": {
            // System.out.println(eValue);
            Elements projects = eValue.select("td[width=49%]");
            // System.out.println(projects.size());
            String[] tmp = new String[projects.size()];
            Element ele = null;
            for (int j = 0; j < projects.size(); j++) {
                ele = projects.get(j);
                Element project = ele.select("a").get(0);
                tmp[j] = project.text() + Seperator.SOURCE_SEPERATOR + project.attr("href");
            }
            this.similarProjects = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR);
            break;
        }
        case "Managers:": {
            if ("Become the first manager for BugSystem".equals(eValue.text())) {
                break;
            }
            Elements users = eValue.select("a");
            String[] tmp = new String[users.size()];
            Element ele = null;
            for (int j = 0; j < users.size(); j++) {
                ele = users.get(j);
                tmp[j] = ele.text() + Seperator.SOURCE_SEPERATOR + ele.attr("href");
            }
            this.managers = StringUtils.join(tmp, Seperator.OSSEAN_SEPERATOR);
            break;
        }
        default: {
            break;
        }
        }
    }
}

From source file:net.trustie.model.SFProject_Model.java

public void afterProcess(Page page) {
    // long start = System.currentTimeMillis();
    this.url = page.getPageUrl();
    // justify it's enterprise or bluesteel user
    // this.html = page.getHtml().toString();
    this.urlMd5 = DigestUtils.md5Hex(page.getPageUrl());
    SimpleDateFormat bartDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    this.collectTime = bartDateFormat.format(new Date());
    this.pageMd5 = DigestUtils.md5Hex(urlMd5 + lastUpdate + feature + downloadCount + stars);
    Document doc = page.getHtml().getDocument();
    Elements bodyEles = doc.select("body");

    if (bodyEles.size() > 0) {
        Element body = bodyEles.get(0);
        String bodyType = body.attr("id");
        if ("pg_project".equals(bodyType)) {
            String type = body.attr("class");

            if (type.equals("bluesteel user")) {

                // bluesteel user
                extractPageBluesteelUser(doc);

            } else if (type.equals("enterprise user")) {
                // enterprise user
                extractPageEnterpriseUser(doc);

            } else {
                // others
            }/*from  w  w  w.  ja va 2  s.co  m*/

            if (lastUpdate.contains("ago")) {
                this.lastUpdate = getTime(lastUpdate);
            }
            if (registeredTime.contains("ago")) {
                this.registeredTime = getTime((registeredTime));
            }

            if (lastUpdate.equals("")) {
                this.lastUpdate = "0000-00-00 00:00:00";
            }
            if (registeredTime.equals("")) {
                this.registeredTime = "0000-00-00 00:00:00";
            }
        } else {
            // name
            Elements nameEles = body.select("div#proj_header div.proj-title h2");
            this.name = nameEles.text();

            // desc
            Elements descEles = body.select("div#top_left div#home_intro div#proj-overview p");
            this.desc = descEles.text();

            // features
            Elements featuresEles = body.select("div#top_left div#home_intro div#proj-overview ul");
            this.feature = featuresEles.text();
        }

        this.lastUpdate = DateHandler.formatAllTypeDate(lastUpdate, page.getTime());
        this.registeredTime = DateHandler.formatAllTypeDate(registeredTime, page.getTime());

    }

    // long end = System.currentTimeMillis();
    // System.out.println(end-start);

    // System.out.println(this.toString());
    // System.out.println(types.get(0).attr("class"));
    // Document doc=page.getHtml().getDocument();

}

From source file:net.trustie.model.SFProject_Model.java

private void extractPageEnterpriseUser(Document doc) {
    // name//from   w  w  w .ja v a2  s .  co  m
    Elements nameElements = doc.select("div#project-header div.content-group h1.project-name a");
    name = nameElements.text();

    // maintainers

    // stars
    Elements starsElements = doc
            .select("div#project-header div.content-group div.project-rating span:not(.rating-count)");
    for (int i = 0; i < starsElements.size(); i++) {
        String attr = starsElements.get(i).attr("class");
        if (attr.equals("rating star")) {
            stars += 1;
        } else if (attr.equals("rating star_half")) {
            stars += 0.5;
        } else {
            stars += 0;
        }
    }

    // download count
    Elements downloadElements = doc
            .select("div#project-header div.content-group div.group a.download-stats span.data");
    if (downloadElements.size() > 0) {
        String strDownloadCount = downloadElements.get(0).text();
        strDownloadCount = strDownloadCount.replaceAll("[^\\d]", "");
        this.downloadCount = strDownloadCount;
    }

    // last update
    Elements lastUpdateElements = doc
            .select("div#project-header div.content-group div.project-rating time.dateUpdated");
    if (lastUpdateElements.size() > 0) {
        lastUpdate = lastUpdateElements.get(0).attr("datetime");
    }

    // platform
    Elements downloadOSElements = doc.select("div#project-header div.content-group div.download-os");
    platform = downloadOSElements.text();

    // desc
    Elements descElements = doc
            .select("section#overview div.content-group section.primary-content p#project-description");
    desc = descElements.text();

    // categories
    // license

    // feature
    Elements featureElements = doc.select(
            "section#overview div.content-group section.primary-content section#project-features div[class=content editable]");
    feature = featureElements.text();
    // language
    // intended audience
    // user interface
    // program language
    // registered time
    // additional detail
    Elements enterpriseAddtionalElements = doc.select(
            "section#overview div.content-group section.primary-content aside#additional-details section.content section.project-info");
    for (int i = 0; i < enterpriseAddtionalElements.size(); i++) {
        Element element = enterpriseAddtionalElements.get(i);
        // System.out.println(element.html());
        // System.out.println("*************************************");
        Elements tags = element.select("header");
        if (tags.size() > 0) {
            String tag = tags.text();
            if (tag.equals("Languages")) {
                language = element.select("section.content").text();
            } else if (tag.equals("Intended Audience")) {
                intendedAudience = element.select("section.content").text();
            } else if (tag.equals("User Interface")) {
                userInterface = element.select("section.content").text();
            } else if (tag.equals("Programming Language")) {
                programmingLanguage = element.select("section.content").text();
            } else if (tag.equals("Registered")) {
                registeredTime = element.select("section.content").text();
            } else if (tag.equals("Last Updated")) {
                lastUpdate = element.select("section.content").text();
            } else if (tag.equals("Maintainers")) {
                maintainers = element.select("a").text();
            } else if (tag.equals("License")) {
                license = element.select("section.content").text();
            } else if (tag.equals("Categories")) {
                categories = element.select("a").text();
            } else {

            }
        }

    }
}