Example usage for org.jsoup.select Elements first

List of usage examples for org.jsoup.select Elements first

Introduction

In this page you can find the example usage for org.jsoup.select Elements first.

Prototype

public Element first() 

Source Link

Document

Get the first matched element.

Usage

From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java

/**
 * Grab contents from the forum that the user clicked on
 * @param doc      The document parsed from the link
 * @param id      The id number of the link
 * @param isMarket    True if the link is from a marketplace category
 *//* www .  jav a2  s.c o  m*/
public void getCategoryContents(Document doc, String id, boolean isMarket) {

    // Update pagination
    try {
        Elements pageNumbers = doc.select("div[class=pagenav]");
        Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]");
        thisPage = pageLinks.text().split(" ")[1];
        finalPage = pageLinks.text().split(" ")[3];
    } catch (Exception e) {
    }

    // Make sure id contains only numbers
    if (!isNewTopicActivity)
        id = Utils.parseInts(id);

    // Grab each thread
    Elements threadListing = doc.select("table[id=threadslist] > tbody > tr");

    for (Element thread : threadListing) {
        try {
            boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false,
                    isPoll = false;
            String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "",
                    lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = "";

            Elements announcementContainer = thread.select("td[colspan=5]");
            Elements threadTitleContainer = thread.select("a[id^=thread_title]");

            // We could have two different types of threads.  Announcement threads are 
            // completely different than the other types of threads (sticky, locked, etc)
            // so we need to play some games here
            if (announcementContainer != null && !announcementContainer.isEmpty()) {
                Log.d(TAG, "Announcement Thread Found");

                Elements annThread = announcementContainer.select("div > a");
                Elements annUser = announcementContainer.select("div > span[class=smallfont]");
                formattedTitle = "Announcement: " + annThread.first().text();
                threadUser = annUser.last().text();
                threadLink = annThread.attr("href");
                isAnnounce = true;
            } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) {
                Element threadLinkEl = thread.select("a[id^=thread_title]").first();
                Element repliesText = thread.select("td[title^=Replies]").first();
                Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first();
                Element threadicon = thread.select("img[id^=thread_statusicon_]").first();
                Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first();
                Element threadDateFull = thread.select("td[title^=Replies:] > div").first();

                try {
                    isSticky = threadDiv.text().contains("Sticky:");
                } catch (Exception e) {
                }

                try {
                    isPoll = threadDiv.text().contains("Poll:");
                } catch (Exception e) {
                }

                try {
                    String icSt = threadicon.attr("src");
                    isLocked = (icSt.contains("lock") && icSt.endsWith(".gif"));
                } catch (Exception e) {
                }

                String preString = "";
                try {
                    preString = threadDiv.select("span > b").text();
                } catch (Exception e) {
                }

                try {
                    hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty();
                } catch (Exception e) {
                }

                // Find the last page if it exists
                try {
                    lastPage = threadDiv.select("span").last().select("a").last().attr("href");
                } catch (Exception e) {
                }

                threadDate = threadDateFull.text();
                int findAMPM = threadDate.indexOf("M") + 1;
                threadDate = threadDate.substring(0, findAMPM);

                String totalPostsInThreadTitle = threadicon.attr("alt");

                if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0)
                    totalPosts = totalPostsInThreadTitle.split(" ")[2];

                // Remove page from the link
                String realLink = Utils.removePageFromLink(link);

                if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) {

                    String txt = repliesText.getElementsByClass("alt2").attr("title");
                    String splitter[] = txt.split(" ", 4);

                    postCount = splitter[1].substring(0, splitter[1].length() - 1);
                    views = splitter[3];

                    try {
                        if (this.isNewTopicActivity)
                            forum = thread.select("td[class=alt1]").last().text();
                    } catch (Exception e) {
                    }

                    formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "",
                            preString.length() == 0 ? "" : preString + " ", threadLinkEl.text());
                }

                threadUser = threaduser.text();
                lastUser = repliesText.select("a[href*=members]").text();
                threadLink = threadLinkEl.attr("href");
            }

            // Add our thread to our list as long as the thread
            // contains a title
            if (!formattedTitle.equals("")) {
                ThreadModel tv = new ThreadModel();
                tv.setTitle(formattedTitle);
                tv.setStartUser(threadUser);
                tv.setLastUser(lastUser);
                tv.setLink(threadLink);
                tv.setLastLink(lastPage);
                tv.setPostCount(postCount);
                tv.setMyPosts(totalPosts);
                tv.setViewCount(views);
                tv.setLocked(isLocked);
                tv.setSticky(isSticky);
                tv.setAnnouncement(isAnnounce);
                tv.setPoll(isPoll);
                tv.setHasAttachment(hasAttachment);
                tv.setForum(forum);
                tv.setLastPostTime(threadDate);
                threadlist.add(tv);
            } else if (thread.text()
                    .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) {
                Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size()));
                if (threadlist.size() > 0) {
                    ThreadModel ltv = threadlist.get(threadlist.size() - 1);
                    Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle()));
                }

                if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext()))
                    threadlist.add(new ThreadModel(true));
                else {
                    Log.d(TAG, "User Chose To Hide Old Threads");
                    break;
                }
            }
        } catch (Exception e) {
            Log.e(TAG, "Error Parsing That Thread...", e);
            Log.d(TAG, "Thread may have moved");
        }
    }
}

From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageViewFragment.java

/**
 * Construct the view elements//from  w  w w . j  a v a  2s.  c o  m
 */
private void constructView() {
    AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() {
        @Override
        protected void onPreExecute() {

            loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading),
                    getString(R.string.pleaseWait), true);
        }

        @Override
        protected Void doInBackground(Void... params) {
            String link = getArguments().getString("link");
            Document doc = VBForumFactory.getInstance().get(getActivity(),
                    VBForumFactory.getRootAddress() + "/" + link);

            if (doc != null) {
                securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken");

                pmid = HtmlFormUtils.getInputElementValueByName(doc, "pmid");

                title = HtmlFormUtils.getInputElementValueByName(doc, "title");

                Elements userPm = doc.select("table[id^=post]");
                publishProgress(getString(R.string.asyncDialogLoadingPM));

                // User Control Panel
                Elements userCp = userPm.select("td[class=alt2]");
                Elements userDetail = userCp.select("div[class=smallfont]");
                Elements userSubDetail = userDetail.last().select("div");
                Elements userAvatar = userDetail.select("img[alt$=Avatar]");
                Elements postMessage = doc.select("div[id=post_message_]");

                PMPostModel pv = new PMPostModel();
                pv.setUserName(userCp.select("div[id^=postmenu]").text());
                pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn()
                        ? UserProfile.getInstance().getUsername().equals(pv.getUserName())
                        : false);
                pv.setUserTitle(userDetail.first().text());
                pv.setUserImageUrl(Utils.resolveUrl(userAvatar.attr("src")));
                pv.setPostDate(userPm.select("td[class=thead]").first().text());

                // userSubDetail
                // 0 - full container , full container
                // 1 - Trader Score   , Trader Score
                // 2 - Join Date      , Join Date
                // 3 - Post Count     , Location
                // 4 - Blank          , Post Count
                // 5 -                , Blank || Social
                //
                Iterator<Element> itr = userSubDetail.listIterator();
                while (itr.hasNext()) {
                    String txt = itr.next().text();
                    if (txt.contains("Location:"))
                        pv.setUserLocation(txt);
                    else if (txt.contains("Posts:"))
                        pv.setUserPostCount(txt);
                    else if (txt.contains("Join Date:"))
                        pv.setJoinDate(txt);
                }

                // User Post Content
                pv.setUserPost(formatUserPost(postMessage));

                pmlist.add(pv);

                TextView comment = (TextView) getView().findViewById(R.id.pmitem_comment);
                Elements textarea = doc.select("textarea[id=vB_Editor_QR_textarea]");
                if (textarea != null) {
                    comment.setText(textarea.first().text());
                }

                updateList();
            }
            return null;
        }

        @Override
        protected void onProgressUpdate(String... progress) {
            if (loadingDialog != null)
                loadingDialog.setMessage(progress[0]);
        }

        @Override
        protected void onPostExecute(Void result) {
            try {
                loadingDialog.dismiss();
                loadingDialog = null;
            } catch (Exception e) {
                Log.w(TAG, e.getMessage());
            }
        }
    };
    updaterTask.execute();
}

From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java

/**
 * Grab contents from the forum that the user clicked on
 * @param doc   The document parsed from the link
 * @param id   The id number of the link
 * @return      An arraylist of forum contents
 *///from  w w w . j a v a 2 s. c o m
public void getThreadContents(Document doc) {
    // Update pagination
    try {
        Elements pageNumbers = doc.select("div[class=pagenav]");
        if (pageNumbers.first() != null) {
            Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]");
            thisPage = pageLinks.text().split(" ")[1];
            finalPage = pageLinks.text().split(" ")[3];
            Log.d(TAG, String.format("This Page: %s, Final Page: %s", thisPage, finalPage));
        } else {
            Log.d(TAG, "Thread only contains one page");
        }
    } catch (Exception e) {
        Log.e(TAG, "We had an error with pagination", e);
    }

    // Is user thread admin??
    Elements threadTools = doc.select("div[id=threadtools_menu] > form > table");
    if (threadTools.text().contains(MODERATION_TOOLS)) {
        Log.d(TAG, "<><> User has administrative rights here! <><>");
    } else {
        //adminContent.setVisibility(View.GONE);
        lv.removeHeaderView(adminContent);
    }

    // Get the user's actual ID, there is a chance they never got it
    // before
    UserProfile.getInstance().setUserId(HtmlFormUtils.getInputElementValueByName(doc, "loggedinuser"));

    // Get Post Number and security token
    securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken");

    Elements pNumber = doc.select("a[href^=http://www.rx8club.com/newreply.php?do=newreply&noquote=1&p=]");
    String pNumberHref = pNumber.attr("href");
    postNumber = pNumberHref.substring(pNumberHref.lastIndexOf("=") + 1);
    threadNumber = doc.select("input[name=searchthreadid]").attr("value");

    Elements posts = doc.select("div[id=posts]").select("div[id^=edit]");
    Log.v(TAG, String.format("Parsing through %d posts", posts.size()));
    for (Element post : posts) {
        try {
            Elements innerPost = post.select("table[id^=post]");

            // User Control Panel
            Elements userCp = innerPost.select("td[class=alt2]");
            Elements userDetail = userCp.select("div[class=smallfont]");
            Elements userSubDetail = userDetail.last().select("div");
            Elements userAvatar = userDetail.select("img[alt$=Avatar]");

            // User Information
            PostModel pv = new PostModel();
            pv.setUserName(userCp.select("div[id^=postmenu]").text());
            pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn()
                    ? UserProfile.getInstance().getUsername().equals(pv.getUserName())
                    : false);
            pv.setUserTitle(userDetail.first().text());
            pv.setUserImageUrl(userAvatar.attr("src"));
            pv.setPostDate(innerPost.select("td[class=thead]").first().text());
            pv.setPostId(Utils.parseInts(post.attr("id")));
            pv.setRootThreadUrl(currentPageLink);

            // get Likes if any exist
            Elements eLikes = innerPost.select("div[class*=vbseo_liked] > a");
            List<String> likes = new ArrayList<String>();
            for (Element eLike : eLikes)
                likes.add(eLike.text());
            pv.setLikes(likes);

            Iterator<Element> itr = userSubDetail.listIterator();
            while (itr.hasNext()) {
                String txt = itr.next().text();
                if (txt.contains("Location:"))
                    pv.setUserLocation(txt);
                else if (txt.contains("Posts:"))
                    pv.setUserPostCount(txt);
                else if (txt.contains("Join Date:"))
                    pv.setJoinDate(txt);
            }

            // User Post Content
            pv.setUserPost(formatUserPost(innerPost));

            // User signature
            try {
                Element userSig = innerPost.select("div[class=konafilter]").first();
                pv.setUserSignature(userSig.html());
            } catch (NullPointerException npe) {
            }

            Elements postAttachments = innerPost.select("a[id^=attachment]");
            if (postAttachments != null && !postAttachments.isEmpty()) {
                ArrayList<String> attachments = new ArrayList<String>();
                for (Element postAttachment : postAttachments) {
                    attachments.add(postAttachment.attr("href"));
                }
                pv.setAttachments(attachments);
            }

            pv.setSecurityToken(securityToken);

            // Make sure we aren't adding a blank user post
            if (pv.getUserPost() != null)
                postlist.add(pv);
        } catch (Exception e) {
            Log.w(TAG, "Error Parsing Post...Probably Deleted");
        }
    }
}

From source file:com.rickendirk.rsgwijzigingen.ZoekService.java

private void addMessage(Wijzigingen wijzigingen, Document doc) {
    Elements messageSpan = doc.select("body > div > div > div > table > tbody > tr:nth-child(1)");
    if (!messageSpan.isEmpty()) {
        String message = messageSpan.first().text();
        wijzigingen.setMessage(message);
    }//from  www.j av  a  2s  . c o  m
}

From source file:im.ene.lab.attiq.ui.activities.ProfileActivity.java

@SuppressWarnings("unused")
public void onEventMainThread(DocumentEvent event) {
    if (event.document != null) {
        Elements stats = event.document.getElementsByClass("userActivityChart_stats");
        Element statBlock;// ww  w  .ja va 2  s . co  m
        if (!UIUtil.isEmpty(stats) && (statBlock = stats.first()) != null) {
            Elements statElements = statBlock.children();
            Integer contribution = null;
            for (Element element : statElements) {
                String unit = element.getElementsByClass("userActivityChart_statUnit").text();
                if ("Contribution".equals(unit.trim())) {
                    try {
                        contribution = Integer
                                .valueOf(element.getElementsByClass("userActivityChart_statCount").text());
                    } catch (NumberFormatException er) {
                        er.printStackTrace();
                    }

                    break;
                }
            }

            if (contribution != null) {
                ((State) mState).contribution = contribution;
                EventBus.getDefault().post(new StateEvent<>(getClass().getSimpleName(), true, null, mState));
            }
        }
    }
}

From source file:org.aankor.animenforadio.api.WebsiteGate.java

private boolean updateNowPlaying(String nowPlaying) {
    Document doc = Jsoup.parse(nowPlaying);

    Elements spans = doc.select("div .float-container .row .span6");

    Matcher matcher = mainNowPlayingPattern.matcher(spans.first().text());

    if (!matcher.find()) {
        unsetCurrentSong();//from w ww . j av  a2  s  .  c o  m
        return false;
    }
    SongInfo newSongInfo = new SongInfo(matcher.group(1), matcher.group(2), matcher.group(3), matcher.group(4),
            matcher.group(5), matcher.group(6));

    Elements e = doc.select("div img");
    if (!e.isEmpty()) {
        String artUrl = e.attr("src");
        newSongInfo.setArtUrl(artUrl);
    } else
        newSongInfo.unsetArtUrl();

    newSongInfo.setSongId(Integer.valueOf(spans.get(1).select("a[data-songinfo]").attr("data-songinfo")));

    int songPosTime = Integer.valueOf(spans.get(1).select("#np_timer").attr("rel"));
    long currentTime = (new Date()).getTime();
    currentSongEndTime = currentTime + songPosTime * 1000l;
    String songPosTimeStr = spans.get(1).select("#np_timer").text();
    newSongInfo.setDuration(Integer.valueOf(spans.get(1).select("#np_time").attr("rel")));
    newSongInfo.setDurationStr(spans.get(1).select("#np_time").text());

    matcher = raitingNowPlayingPattern.matcher(spans.get(1).html());

    if (matcher.find())
        newSongInfo.setRating(matcher.group(1));
    else
        newSongInfo.unsetRating();

    newSongInfo.setFavourites(Integer.valueOf(spans.get(1)
            .select(".favourite-container span[data-favourite-count]").attr("data-favourite-count")));

    matcher = nowPlayingBarPattern.matcher(doc.select("#nowPlayingBar").attr("style"));
    double nowPlayingPos = 0.0;
    if (matcher.find())
        nowPlayingPos = Double.valueOf(matcher.group(1));

    if ((currentSong != null) && newSongInfo.getArtUrl().equals(currentSong.getArtUrl()))
        newSongInfo.setArtBmp(currentSong.getArtBmp(), currentSong.getMiniArtBmp());
    currentSong = newSongInfo;
    currentSongPos = new SongPos(songPosTime, songPosTimeStr, nowPlayingPos);
    return true;
}

From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ????/*from  w w  w .ja  v  a 2s . c  om*/
 * @param elements
 * @return
 */
protected String getExtractText(Elements elements) {
    if (elements.size() == 0)
        return null;
    String temp = "";

    if (attr.equalsIgnoreCase("tostring")) {
        return temp = elements.toString();
    } else {
        if (index == -1 && StringUtils.isNotBlank(this.regex)) {
            for (Element e : elements) {
                Element element = e;
                if (element.select(this.regex).size() > 0) {
                    return temp = e.text();
                }
            }
            return temp;
        } else {
            if (index > -1 && index < elements.size()) {
                return elements.get(index).text();
            }
        }
        return elements.first().text();
    }

    /*if(attr.equals("tostring")){
       if(index==0 || index>elements.size())
    temp = elements.first().toString();
       else
    temp = elements.get(index).toString();
    }else{
       if(index==0 || index>elements.size())
    temp = elements.first().text();
       else
    temp = elements.get(index).text();
    }
            
    if(null!=pattern){
       Matcher m = pattern.matcher(temp);
       if(m.find()){
    temp = m.group(1);
       }
    }*/
    //return temp;
}

From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ??????//w  w  w.ja  v  a  2  s.  c  om
 * @param elements
 * @param attr
 * @return
 */
protected String getExtractAttr(Elements elements, String attr) {
    String temp = "";
    if (attr.equalsIgnoreCase("tostring")) {
        return temp = elements.attr(attr).toString();
    } else {
        if (index == -1 && StringUtils.isNotBlank(this.regex)) {
            for (Element e : elements) {
                Element element = e;
                if (element.select(this.regex).size() > 0) {
                    return temp = e.attr(attr);
                }
            }
            return temp;
        } else {
            if (index > -1 && index < elements.size()) {
                return elements.get(index).attr(attr);
            }
        }
        return elements.first().attr(attr);
    }
    /*if(null!=pattern){
       Matcher m = pattern.matcher(temp);
       if(m.find()){
    temp = m.group(1);
       }
    }*/
    //return temp;
}

From source file:org.asqatasun.crawler.CrawlerImpl.java

/**
 * Waiting for a better implementation, we parse here the html content
 * to detect the presence of the rel=canonical property.
 * @param content//from   w  w  w.j  a  v a  2 s . c  o  m
 * @return whether the current page defines a rel canonical Url and whether
 * this url is different from the current url.
 */
public final boolean isRelCanonicalPage(Content content) {
    // @TODO make this implementation cleaner
    if (!excludeRelCanonical) {
        return false;
    }
    if (!(content instanceof SSP)) {
        return false;
    }
    if (StringUtils.isBlank(((SSP) content).getSource())) {
        return false;
    }
    Elements relCanonical = Jsoup.parse(((SSP) content).getSource()).select(REL_CANONICAL_CSS_LIKE_QUERY);
    if (relCanonical.isEmpty() || relCanonical.size() > 1) {
        return false;
    }
    // At this step, we are sure that the rel canonical is defined and 
    // is unique
    String href = relCanonical.first().attr("href");
    if (href.equals(".")) {
        return false;
    }
    if (href.endsWith("/")) {
        href = href.substring(0, href.length() - 1);
    }
    if (href.startsWith("/")) {
        Elements base = Jsoup.parse(((SSP) content).getSource()).select(BASE_CSS_LIKE_QUERY);
        if (!base.isEmpty()) {
            if (StringUtils.endsWith(base.first().attr("href"), "/")) {
                href = StringUtils.join(base.first().attr("href"), href.substring(1));
            } else {
                href = StringUtils.join(base.first().attr("href") + href);
            }
            LOGGER.debug("(BASE CASE) The concat href " + href);
        } else {
            URI contractUri;
            try {
                contractUri = new URI(content.getURI());
                href = StringUtils.join(contractUri.getScheme(), "://", contractUri.getHost(), href);
                LOGGER.debug("(NORMAL CASE) The concat href " + href);
            } catch (URISyntaxException ex) {
                LOGGER.error("Error when creating uri object with url " + content.getURI());
            }
        }
    }
    if (href.contains("//")) {
        href = href.substring(href.indexOf("//") + 2);
    }
    String currentUrl = content.getURI();
    if (currentUrl.endsWith("/")) {
        currentUrl = currentUrl.substring(0, currentUrl.length() - 1);
    }
    if (currentUrl.contains("//")) {
        currentUrl = currentUrl.substring(currentUrl.indexOf("//") + 2);
    }
    if (currentUrl.equals(href)) {
        LOGGER.info("rel canonical present but points to itself " + content.getURI());
        return false;
    }
    return true;
}

From source file:org.asqatasun.rules.textbuilder.PathElementBuilderTest.java

/**
 * Test of buildTextFromElement method, of class PathElementBuilder.
 *///  w  w  w  .j a  v a  2 s .c  om
public void testBuildTextFromElement() throws IOException {
    LOGGER.info("buildTextFromElement");
    Document document = Jsoup
            .parse(FileUtils.readFileToString(new File("src/test/resources/pathBuilder/test1.html")));
    Element el = document.select("footer").first();
    PathElementBuilder instance = new PathElementBuilder(true);
    String result = instance.buildTextFromElement(el);
    LOGGER.debug("result = " + result);
    Elements elementsFromBuiltPath = document.select(result);
    assertEquals(1, elementsFromBuiltPath.size());
    assertEquals(el, elementsFromBuiltPath.first());
}