Example usage for org.jsoup.select Elements attr

List of usage examples for org.jsoup.select Elements attr

Introduction

In this page you can find the example usage for org.jsoup.select Elements attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute value from the first matched element that has the attribute.

Usage

From source file:it.polito.tellmefirst.web.rest.enhance.Enhancer.java

public String getImageFromMediaWiki(String uri, String label) {
    LOG.debug("[getImageFromMediaWiki] - BEGIN");
    String result = "";
    String imageFileName = "";
    try {//from  w  ww .j a  v a 2 s. c  om
        String lang = (uri.startsWith("http://dbpedia")) ? "en" : "it";

        String filePageURL = "https://" + lang + ".wikipedia.org/wiki/Special:Redirect/file/";
        String commonsFilePageURL = "https://commons.wikimedia.org/wiki/Special:Redirect/file/";

        String queryStart = "https://" + lang + ".wikipedia.org/w/api.php?action=query&prop=pageimages&titles=";
        String queryEnd = "&format=xml";
        String query = queryStart + label.replace(" ", "+") + queryEnd;

        LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + query);
        String xml = restManager.getStringFromAPI(query);
        Document doc = Jsoup.parse(xml);
        Elements elementsFound = doc.getElementsByTag("page");
        imageFileName = elementsFound.attr("pageimage");

        if (imageFileName == "") {
            LOG.debug("No images at all from Wikipedia page " + uri + ". We'll search on Wikidata.");

            String findQidStart = "https://wikidata.org/w/api.php?action=wbgetentities&format=xml&sites=" + lang
                    + "wiki&titles=";
            String findQidEnd = "&props=info&format=xml";
            String findQid = findQidStart + label.replace(" ", "+") + findQidEnd;

            LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
            xml = restManager.getStringFromAPI(findQid);
            doc = Jsoup.parse(xml);
            elementsFound = doc.getElementsByTag("entity");
            String Qid = elementsFound.attr("title");

            //XXX weak API but is the state of art; waiting for a better one https://phabricator.wikimedia.org/T95026
            findQidStart = "https://www.wikidata.org/w/api.php?action=query&prop=images&titles=";
            findQidEnd = "&format=xml";
            findQid = findQidStart + Qid + findQidEnd;

            LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
            xml = restManager.getStringFromAPI(findQid);
            doc = Jsoup.parse(xml);
            elementsFound = doc.getElementsByTag("im");
            imageFileName = elementsFound.attr("title").replace("File:", "");

            if (imageFileName == "") {
                LOG.debug("[getImageFromMediaWiki] - END");
                return DEFAULT_IMAGE;
            } else {
                LOG.debug("[getImageFromMediaWiki] - END");
                return commonsFilePageURL + imageFileName;
            }
        } else {
            LOG.debug("[getImageFromMediaWiki] - END");
            return filePageURL + imageFileName;
        }
    } catch (Exception e) {
        LOG.error("[getImageFromMediaWiki] - EXCEPTION: ", e);
    }
    return DEFAULT_IMAGE;
}

From source file:com.gote.downloader.kgs.KGSDownloader.java

/**
 * Check if a game is public, if yes, then the URL of that game will be sent back.
 * // ww  w  . j a v  a  2s  .c o m
 * @param pCell Element which represents the first KGS archives column
 * @return link of the SGF or null
 */
public String isPublicGame(Element pCell) {
    Elements a = pCell.getElementsByTag("a");

    if (a != null && a.size() > 0) {
        // Check if it is a visible game
        if (a.html().equals(KGSUtils.KGS_TAG_FR_YES)) {
            return a.attr("href");
        }
    }

    return null;
}

From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java

/**
 * Parse the description out of the meta tag if one exists. Otherwise, return
 * null/*from  w  w w  .ja v a  2s . c o m*/
 *
 * @param doc The Document to parse
 * @return The description if it exists in the HTML, otherwise null.
 */
private String getDescriptionFromDocument(Document doc) {
    Elements metaDescriptionElements = doc.select("meta[name=description]");
    return metaDescriptionElements.size() > 0 ? metaDescriptionElements.attr("content") : "";
}

From source file:net.devietti.ArchConfMapServlet.java

/** Fetch info for a list of conferences from WikiCFP */
private List<Conf> getConfInfo(List<String> confs) throws IOException {
    String query = StringUtils.join(confs, "+");
    List<Conf> results = new LinkedList<Conf>();

    /*//  w  w w.  j av a 2  s.c o  m
     * NB: year=f returns hits for this year and future years. This is exactly what we want, since
     * we automatically discard conferences that have already happened.
     */
    Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query);

    Elements rows = doc.select("div[class=contsec] table table tr");
    for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) {
        final Element firstRow = iter.next();
        final Elements confName = firstRow.select("td a");
        if (confName.isEmpty())
            continue;

        final Conf conf = new Conf();

        // make sure we match one of the conferences we're interested in
        String cn = confName.first().text().split(" ")[0];
        int found = Arrays.binarySearch(CONFERENCE_NAMES, cn);
        if (found < 0)
            continue; // not found

        final String confFullName = firstRow.select("td").get(1).text();
        // don't match other ICS conferences, eg Information, Communication, Society
        if (CONFERENCE_NAMES[found].equals("ICS")) {
            if (!confFullName.toLowerCase().contains("supercomputing")) {
                continue;
            }
        }
        // don't match other CC conferences, eg Creative Construction
        if (CONFERENCE_NAMES[found].equals("CC")) {
            if (!confFullName.toLowerCase().contains("compiler")) {
                continue;
            }
        }

        conf.name = confName.first().text();

        /*
         * we found a hit! The conference information is split across two <tr> table elements.
         * Conference name and link to cfp are in the first <tr>, and dates, location and deadline
         * in the second.
         */

        final Element secondRow = iter.next();
        String dates = secondRow.select("td").first().text();
        String startDate = dates.substring(0, dates.indexOf('-')).trim();
        conf.start = cfpDateFormat.parseDateTime(startDate);
        conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim());

        conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end);
        if (conf.start.year().equals(conf.end.year())
                && conf.start.monthOfYear().equals(conf.end.monthOfYear())) {
            conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-"
                    + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start);
        }

        String deadline = secondRow.select("td").get(2).text().trim();
        if (deadline.contains("(")) { // abstract deadline may be in parentheses
            deadline = deadline.substring(0, deadline.indexOf('(')).trim();
        }
        conf.deadline = cfpDateFormat.parseDateTime(deadline);

        conf.url = "http://www.wikicfp.com" + confName.attr("href");
        /*
         * extract the WikiCFP eventid from the link, so that, later on, the client can pull the
         * cfp page and get the direct conference site link.
         */

        com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url);
        String[] eid = url.getParameters("eventid");
        if (0 == eid.length)
            continue;
        try {
            conf.eventid = Integer.valueOf(eid[0]);
        } catch (NumberFormatException e) {
            error("invalid event id " + eid);
            continue;
        }

        conf.location = secondRow.select("td").get(1).text();

        results.add(conf);
    }
    return results;
}

From source file:cn.scujcc.bug.bitcoinplatformandroid.fragment.QuotationInformationFragment.java

public void getImageAndContent(String url, News news) throws Exception {

    Document doc = Jsoup.connect(url).get();

    Elements image = doc.select(".entry-content img");
    news.setImage(image.attr("src"));

    Elements content = doc.select(".entry-content");
    news.setContent(content.html());//from w w w  .  j av  a  2s  .  co m

}

From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java

/**
 * Construct the view for the activity//from w  w w.j a va 2s .  c om
 */
private void constructView() {

    AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() {
        @Override
        protected void onPreExecute() {
            loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading),
                    getString(R.string.pleaseWait), true);
        }

        @Override
        protected Void doInBackground(Void... params) {
            link = getArguments().getString("link");
            pageNumber = getArguments().getString("page");

            if (pageNumber == null)
                pageNumber = "1";

            Document doc = VBForumFactory.getInstance().get(getActivity(),
                    link == null ? WebUrls.newPostUrl : link);

            if (doc != null) {
                // if doc came back, and link was null, we need to update
                // the link reference to reflect the new post URL
                if (link == null) {
                    // <link rel="canonical" 
                    // href="http://www.rx8club.com/search.php?searchid=10961740" />
                    Elements ele = doc.select("link[rel^=canonical]");
                    if (ele != null) {
                        link = ele.attr("href");
                    }
                }

                // The forum id data is only required if we are within a category
                // and not if we are in a New Posts page.  This data is used when
                // we create new threads.
                publishProgress(getString(R.string.asyncDialogGrabThreads));
                try {
                    if (!isNewTopicActivity) {
                        forumId = link.substring(link.lastIndexOf("-") + 1);

                        // Make sure forumid doesn't end with a "/"
                        forumId = Utils.parseInts(forumId);

                        getCategoryContents(doc,
                                link.substring(link.lastIndexOf('-') + 1, link.lastIndexOf('/')),
                                link.contains("sale-wanted"));
                    } else {
                        getCategoryContents(doc, null, false);
                    }

                    publishProgress(getString(R.string.asyncDialogApplyFilters));
                    threadlist = CategoryFilterizer.applyFilter(threadlist);
                } catch (Exception e) {
                    Toast.makeText(getActivity(), R.string.timeout, Toast.LENGTH_SHORT).show();
                }

                getView().findViewById(R.id.mainlisttitle).setVisibility(View.GONE);

                publishProgress(getString(R.string.asyncDialogPopulating));
                updateList();
            }
            return null;
        }

        @Override
        protected void onProgressUpdate(String... progress) {
            if (loadingDialog != null)
                loadingDialog.setMessage(progress[0]);
        }

        @Override
        protected void onPostExecute(Void result) {
            try {
                loadingDialog.dismiss();
                loadingDialog = null;
            } catch (Exception e) {
                Log.w(TAG, e.getMessage());
            }
        }
    };
    updaterTask.execute();
}

From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java

/**
 * Grab contents from the forum that the user clicked on
 * @param doc      The document parsed from the link
 * @param id      The id number of the link
 * @param isMarket    True if the link is from a marketplace category
 *///from   w w w. j  a  v  a  2s . c  o  m
public void getCategoryContents(Document doc, String id, boolean isMarket) {

    // Update pagination
    try {
        Elements pageNumbers = doc.select("div[class=pagenav]");
        Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]");
        thisPage = pageLinks.text().split(" ")[1];
        finalPage = pageLinks.text().split(" ")[3];
    } catch (Exception e) {
    }

    // Make sure id contains only numbers
    if (!isNewTopicActivity)
        id = Utils.parseInts(id);

    // Grab each thread
    Elements threadListing = doc.select("table[id=threadslist] > tbody > tr");

    for (Element thread : threadListing) {
        try {
            boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false,
                    isPoll = false;
            String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "",
                    lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = "";

            Elements announcementContainer = thread.select("td[colspan=5]");
            Elements threadTitleContainer = thread.select("a[id^=thread_title]");

            // We could have two different types of threads.  Announcement threads are 
            // completely different than the other types of threads (sticky, locked, etc)
            // so we need to play some games here
            if (announcementContainer != null && !announcementContainer.isEmpty()) {
                Log.d(TAG, "Announcement Thread Found");

                Elements annThread = announcementContainer.select("div > a");
                Elements annUser = announcementContainer.select("div > span[class=smallfont]");
                formattedTitle = "Announcement: " + annThread.first().text();
                threadUser = annUser.last().text();
                threadLink = annThread.attr("href");
                isAnnounce = true;
            } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) {
                Element threadLinkEl = thread.select("a[id^=thread_title]").first();
                Element repliesText = thread.select("td[title^=Replies]").first();
                Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first();
                Element threadicon = thread.select("img[id^=thread_statusicon_]").first();
                Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first();
                Element threadDateFull = thread.select("td[title^=Replies:] > div").first();

                try {
                    isSticky = threadDiv.text().contains("Sticky:");
                } catch (Exception e) {
                }

                try {
                    isPoll = threadDiv.text().contains("Poll:");
                } catch (Exception e) {
                }

                try {
                    String icSt = threadicon.attr("src");
                    isLocked = (icSt.contains("lock") && icSt.endsWith(".gif"));
                } catch (Exception e) {
                }

                String preString = "";
                try {
                    preString = threadDiv.select("span > b").text();
                } catch (Exception e) {
                }

                try {
                    hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty();
                } catch (Exception e) {
                }

                // Find the last page if it exists
                try {
                    lastPage = threadDiv.select("span").last().select("a").last().attr("href");
                } catch (Exception e) {
                }

                threadDate = threadDateFull.text();
                int findAMPM = threadDate.indexOf("M") + 1;
                threadDate = threadDate.substring(0, findAMPM);

                String totalPostsInThreadTitle = threadicon.attr("alt");

                if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0)
                    totalPosts = totalPostsInThreadTitle.split(" ")[2];

                // Remove page from the link
                String realLink = Utils.removePageFromLink(link);

                if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) {

                    String txt = repliesText.getElementsByClass("alt2").attr("title");
                    String splitter[] = txt.split(" ", 4);

                    postCount = splitter[1].substring(0, splitter[1].length() - 1);
                    views = splitter[3];

                    try {
                        if (this.isNewTopicActivity)
                            forum = thread.select("td[class=alt1]").last().text();
                    } catch (Exception e) {
                    }

                    formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "",
                            preString.length() == 0 ? "" : preString + " ", threadLinkEl.text());
                }

                threadUser = threaduser.text();
                lastUser = repliesText.select("a[href*=members]").text();
                threadLink = threadLinkEl.attr("href");
            }

            // Add our thread to our list as long as the thread
            // contains a title
            if (!formattedTitle.equals("")) {
                ThreadModel tv = new ThreadModel();
                tv.setTitle(formattedTitle);
                tv.setStartUser(threadUser);
                tv.setLastUser(lastUser);
                tv.setLink(threadLink);
                tv.setLastLink(lastPage);
                tv.setPostCount(postCount);
                tv.setMyPosts(totalPosts);
                tv.setViewCount(views);
                tv.setLocked(isLocked);
                tv.setSticky(isSticky);
                tv.setAnnouncement(isAnnounce);
                tv.setPoll(isPoll);
                tv.setHasAttachment(hasAttachment);
                tv.setForum(forum);
                tv.setLastPostTime(threadDate);
                threadlist.add(tv);
            } else if (thread.text()
                    .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) {
                Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size()));
                if (threadlist.size() > 0) {
                    ThreadModel ltv = threadlist.get(threadlist.size() - 1);
                    Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle()));
                }

                if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext()))
                    threadlist.add(new ThreadModel(true));
                else {
                    Log.d(TAG, "User Chose To Hide Old Threads");
                    break;
                }
            }
        } catch (Exception e) {
            Log.e(TAG, "Error Parsing That Thread...", e);
            Log.d(TAG, "Thread may have moved");
        }
    }
}

From source file:com.normalexception.app.rx8club.fragment.HomeFragment.java

/**
 * User profile will be read as an async task after the main
 * activity has started.  This doesn't always run, only when 
 * the cache is either non-existant, or expired
 * @param doc   The current page//from   w ww .  ja  v a2s  .c  o m
 */
private void constructUserProfile(final Document doc) {
    profileTask = new AsyncTask<Void, String, Void>() {
        @Override
        protected void onPreExecute() {
            profileDialog = ProgressDialog.show(getActivity(), getString(R.string.loading),
                    "Validating Profile", true);
        }

        @Override
        protected Void doInBackground(Void... params) {
            if (LoginFactory.getInstance().isLoggedIn()) {
                Document localDoc = doc;
                if (localDoc == null)
                    localDoc = VBForumFactory.getInstance().get(getActivity(), VBForumFactory.getRootAddress());
                if (localDoc != null) {
                    Elements userElement = localDoc.select("a[href^=http://www.rx8club.com/members/"
                            + UserProfile.getInstance().getHtmlUsername() + "]");
                    String un = userElement.attr("href");

                    UserProfile.getInstance().setUserProfileLink(un);

                    try {
                        // Try and scrap the uid from the href
                        UserProfile.getInstance()
                                .setUserId(un.substring(un.lastIndexOf("-") + 1, un.lastIndexOf("/")));
                    } catch (Exception e) {
                        Log.e(TAG, "Error Parsing User ID", e);
                    }
                }
            }
            return null;
        }

        @Override
        protected void onPostExecute(Void result) {
            try {
                profileDialog.dismiss();
                profileDialog = null;
            } catch (Exception e) {
                Log.w(TAG, e.getMessage());
            }
            upcache.cacheContents(UserProfile.getInstance());
        }
    };
    profileTask.execute();
}

From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageInboxFragment.java

/**
 * Construct view by grabbing all private messages.  This is only done
 * if the view is called for the first time.  If there was a savedinstance
 * of the view then this is not called//from  w ww  .j  av  a  2s .co m
 */
private void constructView() {
    this.showOutbound = getArguments().getBoolean(showOutboundExtra, false);

    AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() {
        @Override
        protected void onPreExecute() {

            loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading),
                    getString(R.string.pleaseWait), true);
        }

        @Override
        protected Void doInBackground(Void... params) {
            Document doc = VBForumFactory.getInstance().get(getActivity(),
                    showOutbound ? WebUrls.pmSentUrl : WebUrls.pmInboxUrl);

            if (doc != null) {
                token = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken");
                String current_month = getMonthForInt(0);
                Elements collapse = doc
                        .select(showOutbound ? "tbody[id^=collapseobj_pmf-1]" : "tbody[id^=collapseobj_pmf0]");

                publishProgress(getString(R.string.asyncDialogGrabPMs));
                for (Element coll : collapse) {
                    Elements trs = coll.select("tr");
                    for (Element tr : trs) {
                        Elements alt1s = tr.getElementsByClass("alt1Active");
                        for (Element alt1 : alt1s) {

                            Elements divs = alt1.select("div");

                            // First grab our link
                            Elements linkElement = divs.get(0).select("a[rel=nofollow]");
                            String pmLink = linkElement.attr("href");

                            // There should be two divs here with text in it
                            // the first is 'MM-DD-YYYY Subject'
                            String dateSubject = divs.get(0).text();
                            String[] dateSubjectSplit = dateSubject.split(" ", 2);

                            // The second is HH:MM AMPM User
                            String timeTimeUser = divs.get(1).text();
                            String[] timeTimeUserSplit = timeTimeUser.split(" ", 3);

                            // Create new pm
                            PMModel pm = new PMModel();
                            pm.setDate(dateSubjectSplit[0]);

                            // Check the month before we go further
                            String this_month = getMonthForInt(Integer.parseInt(pm.getDate().split("-")[0]));
                            if (!current_month.equals(this_month)) {
                                current_month = this_month;
                                PMModel pm_m = new PMModel();
                                pm_m.setTitle(String.format("%s - %s", this_month,
                                        showOutbound ? getResources().getString(R.string.inboxSent)
                                                : getResources().getString(R.string.inboxInbox)));
                                pmlist.add(pm_m);
                            }

                            pm.setTime(timeTimeUserSplit[0] + timeTimeUserSplit[1]);
                            pm.setTitle(dateSubjectSplit[1]);
                            pm.setUser(timeTimeUserSplit[2]);
                            pm.setLink(pmLink);
                            pm.setToken(token);

                            Log.v(TAG, "Adding PM From: " + pm.getUser());
                            pmlist.add(pm);
                        }
                    }
                }
                updateList();
            }
            return null;
        }

        @Override
        protected void onProgressUpdate(String... progress) {
            if (loadingDialog != null)
                loadingDialog.setMessage(progress[0]);
        }

        @Override
        protected void onPostExecute(Void result) {
            loadingDialog.dismiss();
        }
    };
    updaterTask.execute();
}