List of usage examples for org.jsoup.select Elements attr
public String attr(String attributeKey)
From source file:it.polito.tellmefirst.web.rest.enhance.Enhancer.java
public String getImageFromMediaWiki(String uri, String label) { LOG.debug("[getImageFromMediaWiki] - BEGIN"); String result = ""; String imageFileName = ""; try {//from w ww .j a v a 2 s. c om String lang = (uri.startsWith("http://dbpedia")) ? "en" : "it"; String filePageURL = "https://" + lang + ".wikipedia.org/wiki/Special:Redirect/file/"; String commonsFilePageURL = "https://commons.wikimedia.org/wiki/Special:Redirect/file/"; String queryStart = "https://" + lang + ".wikipedia.org/w/api.php?action=query&prop=pageimages&titles="; String queryEnd = "&format=xml"; String query = queryStart + label.replace(" ", "+") + queryEnd; LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + query); String xml = restManager.getStringFromAPI(query); Document doc = Jsoup.parse(xml); Elements elementsFound = doc.getElementsByTag("page"); imageFileName = elementsFound.attr("pageimage"); if (imageFileName == "") { LOG.debug("No images at all from Wikipedia page " + uri + ". We'll search on Wikidata."); String findQidStart = "https://wikidata.org/w/api.php?action=wbgetentities&format=xml&sites=" + lang + "wiki&titles="; String findQidEnd = "&props=info&format=xml"; String findQid = findQidStart + label.replace(" ", "+") + findQidEnd; LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid); xml = restManager.getStringFromAPI(findQid); doc = Jsoup.parse(xml); elementsFound = doc.getElementsByTag("entity"); String Qid = elementsFound.attr("title"); //XXX weak API but is the state of art; waiting for a better one https://phabricator.wikimedia.org/T95026 findQidStart = "https://www.wikidata.org/w/api.php?action=query&prop=images&titles="; findQidEnd = "&format=xml"; findQid = findQidStart + Qid + findQidEnd; LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid); xml = restManager.getStringFromAPI(findQid); doc = Jsoup.parse(xml); elementsFound = doc.getElementsByTag("im"); imageFileName = elementsFound.attr("title").replace("File:", ""); if (imageFileName == "") { LOG.debug("[getImageFromMediaWiki] - END"); return DEFAULT_IMAGE; } else { LOG.debug("[getImageFromMediaWiki] - END"); return commonsFilePageURL + imageFileName; } } else { LOG.debug("[getImageFromMediaWiki] - END"); return filePageURL + imageFileName; } } catch (Exception e) { LOG.error("[getImageFromMediaWiki] - EXCEPTION: ", e); } return DEFAULT_IMAGE; }
From source file:com.gote.downloader.kgs.KGSDownloader.java
/** * Check if a game is public, if yes, then the URL of that game will be sent back. * // ww w . j a v a 2s .c o m * @param pCell Element which represents the first KGS archives column * @return link of the SGF or null */ public String isPublicGame(Element pCell) { Elements a = pCell.getElementsByTag("a"); if (a != null && a.size() > 0) { // Check if it is a visible game if (a.html().equals(KGSUtils.KGS_TAG_FR_YES)) { return a.attr("href"); } } return null; }
From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java
/** * Parse the description out of the meta tag if one exists. Otherwise, return * null/*from w w w .ja v a 2s . c o m*/ * * @param doc The Document to parse * @return The description if it exists in the HTML, otherwise null. */ private String getDescriptionFromDocument(Document doc) { Elements metaDescriptionElements = doc.select("meta[name=description]"); return metaDescriptionElements.size() > 0 ? metaDescriptionElements.attr("content") : ""; }
From source file:net.devietti.ArchConfMapServlet.java
/** Fetch info for a list of conferences from WikiCFP */ private List<Conf> getConfInfo(List<String> confs) throws IOException { String query = StringUtils.join(confs, "+"); List<Conf> results = new LinkedList<Conf>(); /*// w w w. j av a 2 s.c o m * NB: year=f returns hits for this year and future years. This is exactly what we want, since * we automatically discard conferences that have already happened. */ Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query); Elements rows = doc.select("div[class=contsec] table table tr"); for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) { final Element firstRow = iter.next(); final Elements confName = firstRow.select("td a"); if (confName.isEmpty()) continue; final Conf conf = new Conf(); // make sure we match one of the conferences we're interested in String cn = confName.first().text().split(" ")[0]; int found = Arrays.binarySearch(CONFERENCE_NAMES, cn); if (found < 0) continue; // not found final String confFullName = firstRow.select("td").get(1).text(); // don't match other ICS conferences, eg Information, Communication, Society if (CONFERENCE_NAMES[found].equals("ICS")) { if (!confFullName.toLowerCase().contains("supercomputing")) { continue; } } // don't match other CC conferences, eg Creative Construction if (CONFERENCE_NAMES[found].equals("CC")) { if (!confFullName.toLowerCase().contains("compiler")) { continue; } } conf.name = confName.first().text(); /* * we found a hit! The conference information is split across two <tr> table elements. * Conference name and link to cfp are in the first <tr>, and dates, location and deadline * in the second. */ final Element secondRow = iter.next(); String dates = secondRow.select("td").first().text(); String startDate = dates.substring(0, dates.indexOf('-')).trim(); conf.start = cfpDateFormat.parseDateTime(startDate); conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim()); conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end); if (conf.start.year().equals(conf.end.year()) && conf.start.monthOfYear().equals(conf.end.monthOfYear())) { conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-" + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start); } String deadline = secondRow.select("td").get(2).text().trim(); if (deadline.contains("(")) { // abstract deadline may be in parentheses deadline = deadline.substring(0, deadline.indexOf('(')).trim(); } conf.deadline = cfpDateFormat.parseDateTime(deadline); conf.url = "http://www.wikicfp.com" + confName.attr("href"); /* * extract the WikiCFP eventid from the link, so that, later on, the client can pull the * cfp page and get the direct conference site link. */ com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url); String[] eid = url.getParameters("eventid"); if (0 == eid.length) continue; try { conf.eventid = Integer.valueOf(eid[0]); } catch (NumberFormatException e) { error("invalid event id " + eid); continue; } conf.location = secondRow.select("td").get(1).text(); results.add(conf); } return results; }
From source file:cn.scujcc.bug.bitcoinplatformandroid.fragment.QuotationInformationFragment.java
public void getImageAndContent(String url, News news) throws Exception { Document doc = Jsoup.connect(url).get(); Elements image = doc.select(".entry-content img"); news.setImage(image.attr("src")); Elements content = doc.select(".entry-content"); news.setContent(content.html());//from w w w . j av a 2s . co m }
From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java
/** * Construct the view for the activity//from w w w.j a va 2s . c om */ private void constructView() { AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() { @Override protected void onPreExecute() { loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading), getString(R.string.pleaseWait), true); } @Override protected Void doInBackground(Void... params) { link = getArguments().getString("link"); pageNumber = getArguments().getString("page"); if (pageNumber == null) pageNumber = "1"; Document doc = VBForumFactory.getInstance().get(getActivity(), link == null ? WebUrls.newPostUrl : link); if (doc != null) { // if doc came back, and link was null, we need to update // the link reference to reflect the new post URL if (link == null) { // <link rel="canonical" // href="http://www.rx8club.com/search.php?searchid=10961740" /> Elements ele = doc.select("link[rel^=canonical]"); if (ele != null) { link = ele.attr("href"); } } // The forum id data is only required if we are within a category // and not if we are in a New Posts page. This data is used when // we create new threads. publishProgress(getString(R.string.asyncDialogGrabThreads)); try { if (!isNewTopicActivity) { forumId = link.substring(link.lastIndexOf("-") + 1); // Make sure forumid doesn't end with a "/" forumId = Utils.parseInts(forumId); getCategoryContents(doc, link.substring(link.lastIndexOf('-') + 1, link.lastIndexOf('/')), link.contains("sale-wanted")); } else { getCategoryContents(doc, null, false); } publishProgress(getString(R.string.asyncDialogApplyFilters)); threadlist = CategoryFilterizer.applyFilter(threadlist); } catch (Exception e) { Toast.makeText(getActivity(), R.string.timeout, Toast.LENGTH_SHORT).show(); } getView().findViewById(R.id.mainlisttitle).setVisibility(View.GONE); publishProgress(getString(R.string.asyncDialogPopulating)); updateList(); } return null; } @Override protected void onProgressUpdate(String... progress) { if (loadingDialog != null) loadingDialog.setMessage(progress[0]); } @Override protected void onPostExecute(Void result) { try { loadingDialog.dismiss(); loadingDialog = null; } catch (Exception e) { Log.w(TAG, e.getMessage()); } } }; updaterTask.execute(); }
From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @param isMarket True if the link is from a marketplace category *///from w w w. j a v a 2s . c o m public void getCategoryContents(Document doc, String id, boolean isMarket) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; } catch (Exception e) { } // Make sure id contains only numbers if (!isNewTopicActivity) id = Utils.parseInts(id); // Grab each thread Elements threadListing = doc.select("table[id=threadslist] > tbody > tr"); for (Element thread : threadListing) { try { boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false, isPoll = false; String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "", lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = ""; Elements announcementContainer = thread.select("td[colspan=5]"); Elements threadTitleContainer = thread.select("a[id^=thread_title]"); // We could have two different types of threads. Announcement threads are // completely different than the other types of threads (sticky, locked, etc) // so we need to play some games here if (announcementContainer != null && !announcementContainer.isEmpty()) { Log.d(TAG, "Announcement Thread Found"); Elements annThread = announcementContainer.select("div > a"); Elements annUser = announcementContainer.select("div > span[class=smallfont]"); formattedTitle = "Announcement: " + annThread.first().text(); threadUser = annUser.last().text(); threadLink = annThread.attr("href"); isAnnounce = true; } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) { Element threadLinkEl = thread.select("a[id^=thread_title]").first(); Element repliesText = thread.select("td[title^=Replies]").first(); Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first(); Element threadicon = thread.select("img[id^=thread_statusicon_]").first(); Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first(); Element threadDateFull = thread.select("td[title^=Replies:] > div").first(); try { isSticky = threadDiv.text().contains("Sticky:"); } catch (Exception e) { } try { isPoll = threadDiv.text().contains("Poll:"); } catch (Exception e) { } try { String icSt = threadicon.attr("src"); isLocked = (icSt.contains("lock") && icSt.endsWith(".gif")); } catch (Exception e) { } String preString = ""; try { preString = threadDiv.select("span > b").text(); } catch (Exception e) { } try { hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty(); } catch (Exception e) { } // Find the last page if it exists try { lastPage = threadDiv.select("span").last().select("a").last().attr("href"); } catch (Exception e) { } threadDate = threadDateFull.text(); int findAMPM = threadDate.indexOf("M") + 1; threadDate = threadDate.substring(0, findAMPM); String totalPostsInThreadTitle = threadicon.attr("alt"); if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0) totalPosts = totalPostsInThreadTitle.split(" ")[2]; // Remove page from the link String realLink = Utils.removePageFromLink(link); if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) { String txt = repliesText.getElementsByClass("alt2").attr("title"); String splitter[] = txt.split(" ", 4); postCount = splitter[1].substring(0, splitter[1].length() - 1); views = splitter[3]; try { if (this.isNewTopicActivity) forum = thread.select("td[class=alt1]").last().text(); } catch (Exception e) { } formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "", preString.length() == 0 ? "" : preString + " ", threadLinkEl.text()); } threadUser = threaduser.text(); lastUser = repliesText.select("a[href*=members]").text(); threadLink = threadLinkEl.attr("href"); } // Add our thread to our list as long as the thread // contains a title if (!formattedTitle.equals("")) { ThreadModel tv = new ThreadModel(); tv.setTitle(formattedTitle); tv.setStartUser(threadUser); tv.setLastUser(lastUser); tv.setLink(threadLink); tv.setLastLink(lastPage); tv.setPostCount(postCount); tv.setMyPosts(totalPosts); tv.setViewCount(views); tv.setLocked(isLocked); tv.setSticky(isSticky); tv.setAnnouncement(isAnnounce); tv.setPoll(isPoll); tv.setHasAttachment(hasAttachment); tv.setForum(forum); tv.setLastPostTime(threadDate); threadlist.add(tv); } else if (thread.text() .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) { Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size())); if (threadlist.size() > 0) { ThreadModel ltv = threadlist.get(threadlist.size() - 1); Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle())); } if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext())) threadlist.add(new ThreadModel(true)); else { Log.d(TAG, "User Chose To Hide Old Threads"); break; } } } catch (Exception e) { Log.e(TAG, "Error Parsing That Thread...", e); Log.d(TAG, "Thread may have moved"); } } }
From source file:com.normalexception.app.rx8club.fragment.HomeFragment.java
/** * User profile will be read as an async task after the main * activity has started. This doesn't always run, only when * the cache is either non-existant, or expired * @param doc The current page//from w ww . ja v a2s .c o m */ private void constructUserProfile(final Document doc) { profileTask = new AsyncTask<Void, String, Void>() { @Override protected void onPreExecute() { profileDialog = ProgressDialog.show(getActivity(), getString(R.string.loading), "Validating Profile", true); } @Override protected Void doInBackground(Void... params) { if (LoginFactory.getInstance().isLoggedIn()) { Document localDoc = doc; if (localDoc == null) localDoc = VBForumFactory.getInstance().get(getActivity(), VBForumFactory.getRootAddress()); if (localDoc != null) { Elements userElement = localDoc.select("a[href^=http://www.rx8club.com/members/" + UserProfile.getInstance().getHtmlUsername() + "]"); String un = userElement.attr("href"); UserProfile.getInstance().setUserProfileLink(un); try { // Try and scrap the uid from the href UserProfile.getInstance() .setUserId(un.substring(un.lastIndexOf("-") + 1, un.lastIndexOf("/"))); } catch (Exception e) { Log.e(TAG, "Error Parsing User ID", e); } } } return null; } @Override protected void onPostExecute(Void result) { try { profileDialog.dismiss(); profileDialog = null; } catch (Exception e) { Log.w(TAG, e.getMessage()); } upcache.cacheContents(UserProfile.getInstance()); } }; profileTask.execute(); }
From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageInboxFragment.java
/** * Construct view by grabbing all private messages. This is only done * if the view is called for the first time. If there was a savedinstance * of the view then this is not called//from w ww .j av a 2s .co m */ private void constructView() { this.showOutbound = getArguments().getBoolean(showOutboundExtra, false); AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() { @Override protected void onPreExecute() { loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading), getString(R.string.pleaseWait), true); } @Override protected Void doInBackground(Void... params) { Document doc = VBForumFactory.getInstance().get(getActivity(), showOutbound ? WebUrls.pmSentUrl : WebUrls.pmInboxUrl); if (doc != null) { token = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); String current_month = getMonthForInt(0); Elements collapse = doc .select(showOutbound ? "tbody[id^=collapseobj_pmf-1]" : "tbody[id^=collapseobj_pmf0]"); publishProgress(getString(R.string.asyncDialogGrabPMs)); for (Element coll : collapse) { Elements trs = coll.select("tr"); for (Element tr : trs) { Elements alt1s = tr.getElementsByClass("alt1Active"); for (Element alt1 : alt1s) { Elements divs = alt1.select("div"); // First grab our link Elements linkElement = divs.get(0).select("a[rel=nofollow]"); String pmLink = linkElement.attr("href"); // There should be two divs here with text in it // the first is 'MM-DD-YYYY Subject' String dateSubject = divs.get(0).text(); String[] dateSubjectSplit = dateSubject.split(" ", 2); // The second is HH:MM AMPM User String timeTimeUser = divs.get(1).text(); String[] timeTimeUserSplit = timeTimeUser.split(" ", 3); // Create new pm PMModel pm = new PMModel(); pm.setDate(dateSubjectSplit[0]); // Check the month before we go further String this_month = getMonthForInt(Integer.parseInt(pm.getDate().split("-")[0])); if (!current_month.equals(this_month)) { current_month = this_month; PMModel pm_m = new PMModel(); pm_m.setTitle(String.format("%s - %s", this_month, showOutbound ? getResources().getString(R.string.inboxSent) : getResources().getString(R.string.inboxInbox))); pmlist.add(pm_m); } pm.setTime(timeTimeUserSplit[0] + timeTimeUserSplit[1]); pm.setTitle(dateSubjectSplit[1]); pm.setUser(timeTimeUserSplit[2]); pm.setLink(pmLink); pm.setToken(token); Log.v(TAG, "Adding PM From: " + pm.getUser()); pmlist.add(pm); } } } updateList(); } return null; } @Override protected void onProgressUpdate(String... progress) { if (loadingDialog != null) loadingDialog.setMessage(progress[0]); } @Override protected void onPostExecute(Void result) { loadingDialog.dismiss(); } }; updaterTask.execute(); }