List of usage examples for org.jsoup.nodes Element getElementsByClass
public Elements getElementsByClass(String className)
From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java
public FetchResult updateData(Context context, boolean force) { //Open database DBOpenHelper dbhelper = new DBOpenHelper(context); SQLiteDatabase db = dbhelper.getWritableDatabase(); // check for internet connectivity try {/*from w ww . j av a 2 s.com*/ if (!isOnline(context)) { Log.d(TAG, "We do not seem to be online. Skipping Update."); return FetchResult.NOTONLINE; } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()"); } SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context); if (!force) { try { if (sp.getBoolean("loginFailed", false) == true) { Log.d(TAG, "Previous login failed. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update."); return FetchResult.LOGINFAILED; } if (sp.getBoolean("autoupdates", true) == false) { Log.d(TAG, "Automatic updates not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Background data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Auto sync not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) { Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); DBLog.insertMessage(context, "i", TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); return FetchResult.NOTALLOWED; } else if (!isWifi(context)) { Log.d(TAG, "We are not on wifi."); if (!isRoaming(context) && !sp.getBoolean("2DData", true)) { Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) { Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception while finding if to update."); } } else { Log.d(TAG, "Update Forced"); } try { String username = sp.getString("username", null); String password = sp.getString("password", null); if (username == null || password == null) { DBLog.insertMessage(context, "i", TAG, "Username or password not set."); return FetchResult.USERNAMEPASSWORDNOTSET; } // Find the URL of the page to send login data to. Log.d(TAG, "Finding Action. "); HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login"); String loginPageString = loginPageGet.execute(); if (loginPageString != null) { Document loginPage = Jsoup.parse(loginPageString, "https://secure.2degreesmobile.co.nz/web/ip/login"); Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first(); String loginAction = loginForm.attr("action"); // Send login form List<NameValuePair> loginValues = new ArrayList<NameValuePair>(); loginValues.add(new BasicNameValuePair("externalURLRedirect", "")); loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin")); loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M")); loginValues.add(new BasicNameValuePair("hdnlocale", "")); loginValues.add(new BasicNameValuePair("userid", username)); loginValues.add(new BasicNameValuePair("password", password)); Log.d(TAG, "Sending Login "); HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues); // Parse result String loginResponse = sendLoginPoster.execute(); Document loginResponseParsed = Jsoup.parse(loginResponse); // Determine if this is a pre-pay or post-paid account. boolean postPaid; if (loginResponseParsed .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) { Log.d(TAG, "Pre-pay account or no account."); postPaid = false; } else { Log.d(TAG, "Post-paid account."); postPaid = true; } String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home"; if (postPaid) { homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid"; } HttpGetter homepageGetter = new HttpGetter(homepageUrl); String homepageHTML = homepageGetter.execute(); Document homePage = Jsoup.parse(homepageHTML); Element accountSummary = homePage.getElementById("accountSummary"); if (accountSummary == null) { Log.d(TAG, "Login failed."); return FetchResult.LOGINFAILED; } db.delete("cache", "", null); /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful. * Might reconsider this. * if (postPaid) { Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first(); Elements rows = accountBalanceSummaryTable.getElementsByTag("tr"); int rowno = 0; for (Element row : rows) { if (rowno > 1) { break; } //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); Log.d(TAG, amountHTML.substring(1)); value = Double.parseDouble(amountHTML.substring(1)); } catch (Exception e) { Log.d(TAG, "Failed to parse amount from row."); value = null; } String expiresDetails = ""; String expiresDate = null; String name = null; try { Element details = row.getElementsByClass("tableBilldetail").first(); name = details.ownText(); Element expires = details.getElementsByTag("em").first(); if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; pattern = Pattern.compile("\\(payment is due (.*)\\)"); Matcher matcher = pattern.matcher(expiresDetails); if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); * String expiresDateString = matcher.group(1); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } } catch (Exception e) { Log.d(TAG, "Failed to parse details from row."); } String expirev = null; ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", "$NZ"); values.put("expires_value", expirev ); values.put("expires_date", expiresDate); db.insert("cache", "value", values ); rowno++; } } */ Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first(); Elements rows = accountSummaryTable.getElementsByTag("tr"); for (Element row : rows) { // We are now looking at each of the rows in the data table. //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; String units; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); //Log.d(TAG, amountHTML); String[] amountParts = amountHTML.split(" ", 2); //Log.d(TAG, amountParts[0]); //Log.d(TAG, amountParts[1]); if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need") || amountParts[0].equals("Unlimited Text*")) { value = Values.INCLUDED; } else { try { value = Double.parseDouble(amountParts[0]); } catch (NumberFormatException e) { exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value."); value = 0.0; } } units = amountParts[1]; } catch (NullPointerException e) { //Log.d(TAG, "Failed to parse amount from row."); value = null; units = null; } Element details = row.getElementsByClass("tableBilldetail").first(); String name = details.getElementsByTag("strong").first().text(); Element expires = details.getElementsByTag("em").first(); String expiresDetails = ""; if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; if (postPaid == false) { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)"); } else { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)"); } Matcher matcher = pattern.matcher(expiresDetails); Double expiresValue = null; String expiresDate = null; if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); */ try { expiresValue = Double.parseDouble(matcher.group(1)); } catch (NumberFormatException e) { expiresValue = null; } String expiresDateString = matcher.group(2); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", units); values.put("expires_value", expiresValue); values.put("expires_date", expiresDate); db.insert("cache", "value", values); } if (postPaid == false) { Log.d(TAG, "Getting Value packs..."); // Find value packs HttpGetter valuePacksPageGet = new HttpGetter( "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack"); String valuePacksPageString = valuePacksPageGet.execute(); //DBLog.insertMessage(context, "d", "", valuePacksPageString); if (valuePacksPageString != null) { Document valuePacksPage = Jsoup.parse(valuePacksPageString); Elements enabledPacks = valuePacksPage.getElementsByClass("yellow"); for (Element enabledPack : enabledPacks) { Element offerNameElemt = enabledPack .getElementsByAttributeValueStarting("name", "offername").first(); if (offerNameElemt != null) { String offerName = offerNameElemt.val(); DBLog.insertMessage(context, "d", "", "Got element: " + offerName); ValuePack[] packs = Values.valuePacks.get(offerName); if (packs == null) { DBLog.insertMessage(context, "d", "", "Offer name: " + offerName + " not matched."); } else { for (ValuePack pack : packs) { ContentValues values = new ContentValues(); values.put("plan_startamount", pack.value); values.put("plan_name", offerName); DBLog.insertMessage(context, "d", "", "Pack " + pack.type.id + " start value set to " + pack.value); db.update("cache", values, "name = '" + pack.type.id + "'", null); } } } } } } SharedPreferences.Editor prefedit = sp.edit(); Date now = new Date(); prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now)); prefedit.putBoolean("loginFailed", false); prefedit.putBoolean("networkError", false); prefedit.commit(); DBLog.insertMessage(context, "i", TAG, "Update Successful"); return FetchResult.SUCCESS; } } catch (ClientProtocolException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } catch (IOException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } finally { db.close(); } return null; }
From source file:ca.zadrox.dota2esportticker.service.UpdateMatchService.java
private void updateMatches(boolean doResults) { if (!checkForConnectivity()) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_NO_CONNECTIVITY)); return;//from w ww . j a va 2s . co m } LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_STARTED)); final String BASE_URL = "http://www.gosugamers.net/dota2/gosubet"; final String MATCH_LINK_URL_BASE = "http://www.gosugamers.net"; try { String rawHtml = new OkHttpClient().newCall(new Request.Builder().url(BASE_URL).build()).execute() .body().string(); rawHtml = rawHtml.substring(rawHtml.indexOf("<div id=\"col1\" class=\"rows\">"), rawHtml.indexOf("<div id=\"col2\" class=\"rows\">")); Document doc = Jsoup.parse(rawHtml); Elements tables = doc.getElementsByClass("matches"); ArrayList<ArrayList<String>> matchLinks = new ArrayList<ArrayList<String>>(tables.size()); int numSeries = 0; for (Element table : tables) { Elements links = table.getElementsByClass("match"); if (links.size() != 0) { ArrayList<String> innerMatchLink = new ArrayList<String>(links.size()); for (Element link : links) { String linkHref = link.attr("href"); innerMatchLink.add(MATCH_LINK_URL_BASE + linkHref); numSeries++; } matchLinks.add(innerMatchLink); } } // needed if there are massive reschedules to update content properly. Uri resultsUri = MatchContract.SeriesEntry.buildSeriesUriWithAfterTime(TimeUtils.getUTCTime()); Cursor c = getContentResolver().query(resultsUri, new String[] { MatchContract.SeriesEntry.COLUMN_GG_MATCH_PAGE }, null, null, null); while (c.moveToNext()) { if (!matchLinks.get(0).contains(c.getString(0))) { matchLinks.get(0).add(c.getString(0)); } } Iterator<ArrayList<String>> iterator = matchLinks.iterator(); int numResults = 0; ExecutorService executorService = Executors.newFixedThreadPool(10); ArrayList<Future<BundledMatchItem>> seriesItemFutures = new ArrayList<Future<BundledMatchItem>>( numSeries); LogUtils.LOGD(TAG, "Starting Retrieval, num elements gathered: " + numSeries); int i = 0; while (iterator.hasNext()) { ArrayList<String> matchList = iterator.next(); for (String matchUrl : matchList) { boolean hasResult = !iterator.hasNext(); if (!doResults && hasResult) { continue; } else if (hasResult) { numResults++; } seriesItemFutures.add(executorService.submit(new MatchGetter(matchUrl, hasResult))); i++; } } executorService.shutdown(); executorService.awaitTermination(20L, TimeUnit.SECONDS); LogUtils.LOGD(TAG, "Stopping Retrieval, elements submitted for fetching: " + i); ContentValues[] seriesEntries = new ContentValues[i]; ContentValues[] resultEntries = new ContentValues[numResults]; int seriesEntryWriteIndex = 0; int resultEntryWriteIndex = 0; for (Future<BundledMatchItem> seriesItemFuture : seriesItemFutures) { try { BundledMatchItem seriesItem = seriesItemFuture.get(); if (seriesItem != null) { seriesEntries[seriesEntryWriteIndex] = seriesItem.mMatch; seriesEntryWriteIndex++; if (seriesItem.hasResult) { resultEntries[resultEntryWriteIndex] = seriesItem.mResult; resultEntryWriteIndex++; } } } catch (ExecutionException e) { Log.e(TAG, "Should never get here"); } } this.getContentResolver().bulkInsert(MatchContract.SeriesEntry.CONTENT_URI, seriesEntries); if (doResults) this.getContentResolver().bulkInsert(MatchContract.ResultEntry.CONTENT_URI, resultEntries); PrefUtils.setLastUpdateTime(this, TimeUtils.getUTCTime()); } catch (IOException e) { Log.e(TAG, e.getMessage(), e); e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_COMPLETE)); PrefUtils.setLastResultsUpdateTime(this, TimeUtils.getUTCTime()); }
From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @param isMarket True if the link is from a marketplace category *///from www. j ava2 s . com public void getCategoryContents(Document doc, String id, boolean isMarket) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; } catch (Exception e) { } // Make sure id contains only numbers if (!isNewTopicActivity) id = Utils.parseInts(id); // Grab each thread Elements threadListing = doc.select("table[id=threadslist] > tbody > tr"); for (Element thread : threadListing) { try { boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false, isPoll = false; String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "", lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = ""; Elements announcementContainer = thread.select("td[colspan=5]"); Elements threadTitleContainer = thread.select("a[id^=thread_title]"); // We could have two different types of threads. Announcement threads are // completely different than the other types of threads (sticky, locked, etc) // so we need to play some games here if (announcementContainer != null && !announcementContainer.isEmpty()) { Log.d(TAG, "Announcement Thread Found"); Elements annThread = announcementContainer.select("div > a"); Elements annUser = announcementContainer.select("div > span[class=smallfont]"); formattedTitle = "Announcement: " + annThread.first().text(); threadUser = annUser.last().text(); threadLink = annThread.attr("href"); isAnnounce = true; } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) { Element threadLinkEl = thread.select("a[id^=thread_title]").first(); Element repliesText = thread.select("td[title^=Replies]").first(); Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first(); Element threadicon = thread.select("img[id^=thread_statusicon_]").first(); Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first(); Element threadDateFull = thread.select("td[title^=Replies:] > div").first(); try { isSticky = threadDiv.text().contains("Sticky:"); } catch (Exception e) { } try { isPoll = threadDiv.text().contains("Poll:"); } catch (Exception e) { } try { String icSt = threadicon.attr("src"); isLocked = (icSt.contains("lock") && icSt.endsWith(".gif")); } catch (Exception e) { } String preString = ""; try { preString = threadDiv.select("span > b").text(); } catch (Exception e) { } try { hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty(); } catch (Exception e) { } // Find the last page if it exists try { lastPage = threadDiv.select("span").last().select("a").last().attr("href"); } catch (Exception e) { } threadDate = threadDateFull.text(); int findAMPM = threadDate.indexOf("M") + 1; threadDate = threadDate.substring(0, findAMPM); String totalPostsInThreadTitle = threadicon.attr("alt"); if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0) totalPosts = totalPostsInThreadTitle.split(" ")[2]; // Remove page from the link String realLink = Utils.removePageFromLink(link); if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) { String txt = repliesText.getElementsByClass("alt2").attr("title"); String splitter[] = txt.split(" ", 4); postCount = splitter[1].substring(0, splitter[1].length() - 1); views = splitter[3]; try { if (this.isNewTopicActivity) forum = thread.select("td[class=alt1]").last().text(); } catch (Exception e) { } formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "", preString.length() == 0 ? "" : preString + " ", threadLinkEl.text()); } threadUser = threaduser.text(); lastUser = repliesText.select("a[href*=members]").text(); threadLink = threadLinkEl.attr("href"); } // Add our thread to our list as long as the thread // contains a title if (!formattedTitle.equals("")) { ThreadModel tv = new ThreadModel(); tv.setTitle(formattedTitle); tv.setStartUser(threadUser); tv.setLastUser(lastUser); tv.setLink(threadLink); tv.setLastLink(lastPage); tv.setPostCount(postCount); tv.setMyPosts(totalPosts); tv.setViewCount(views); tv.setLocked(isLocked); tv.setSticky(isSticky); tv.setAnnouncement(isAnnounce); tv.setPoll(isPoll); tv.setHasAttachment(hasAttachment); tv.setForum(forum); tv.setLastPostTime(threadDate); threadlist.add(tv); } else if (thread.text() .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) { Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size())); if (threadlist.size() > 0) { ThreadModel ltv = threadlist.get(threadlist.size() - 1); Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle())); } if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext())) threadlist.add(new ThreadModel(true)); else { Log.d(TAG, "User Chose To Hide Old Threads"); break; } } } catch (Exception e) { Log.e(TAG, "Error Parsing That Thread...", e); Log.d(TAG, "Thread may have moved"); } } }
From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageInboxFragment.java
/** * Construct view by grabbing all private messages. This is only done * if the view is called for the first time. If there was a savedinstance * of the view then this is not called/*from w w w. jav a 2s . co m*/ */ private void constructView() { this.showOutbound = getArguments().getBoolean(showOutboundExtra, false); AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() { @Override protected void onPreExecute() { loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading), getString(R.string.pleaseWait), true); } @Override protected Void doInBackground(Void... params) { Document doc = VBForumFactory.getInstance().get(getActivity(), showOutbound ? WebUrls.pmSentUrl : WebUrls.pmInboxUrl); if (doc != null) { token = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); String current_month = getMonthForInt(0); Elements collapse = doc .select(showOutbound ? "tbody[id^=collapseobj_pmf-1]" : "tbody[id^=collapseobj_pmf0]"); publishProgress(getString(R.string.asyncDialogGrabPMs)); for (Element coll : collapse) { Elements trs = coll.select("tr"); for (Element tr : trs) { Elements alt1s = tr.getElementsByClass("alt1Active"); for (Element alt1 : alt1s) { Elements divs = alt1.select("div"); // First grab our link Elements linkElement = divs.get(0).select("a[rel=nofollow]"); String pmLink = linkElement.attr("href"); // There should be two divs here with text in it // the first is 'MM-DD-YYYY Subject' String dateSubject = divs.get(0).text(); String[] dateSubjectSplit = dateSubject.split(" ", 2); // The second is HH:MM AMPM User String timeTimeUser = divs.get(1).text(); String[] timeTimeUserSplit = timeTimeUser.split(" ", 3); // Create new pm PMModel pm = new PMModel(); pm.setDate(dateSubjectSplit[0]); // Check the month before we go further String this_month = getMonthForInt(Integer.parseInt(pm.getDate().split("-")[0])); if (!current_month.equals(this_month)) { current_month = this_month; PMModel pm_m = new PMModel(); pm_m.setTitle(String.format("%s - %s", this_month, showOutbound ? getResources().getString(R.string.inboxSent) : getResources().getString(R.string.inboxInbox))); pmlist.add(pm_m); } pm.setTime(timeTimeUserSplit[0] + timeTimeUserSplit[1]); pm.setTitle(dateSubjectSplit[1]); pm.setUser(timeTimeUserSplit[2]); pm.setLink(pmLink); pm.setToken(token); Log.v(TAG, "Adding PM From: " + pm.getUser()); pmlist.add(pm); } } } updateList(); } return null; } @Override protected void onProgressUpdate(String... progress) { if (loadingDialog != null) loadingDialog.setMessage(progress[0]); } @Override protected void onPostExecute(Void result) { loadingDialog.dismiss(); } }; updaterTask.execute(); }
From source file:im.ene.lab.attiq.ui.activities.ItemDetailActivity.java
@SuppressWarnings("unused") public void onEventMainThread(ItemCommentsEvent event) { if (!UIUtil.isEmpty(event.comments)) { mCommentsView.setVisibility(View.VISIBLE); List<Comment> comments = event.comments; mCommentCount.setText(comments.size() + ""); String info = comments.size() == 1 ? getString(R.string.comment_singular) : getString(R.string.comment_plural); // FIXME should use plural strings mCommentInfo.setText(getString(R.string.article_comment, comments.size(), info)); final String html; try {//from w w w . ja v a2s . c o m html = IOUtil.readAssets("html/comments.html"); Document fullBody = Jsoup.parse(html); Element content = fullBody.getElementById("content"); for (Comment comment : comments) { String commentHtml = IOUtil.readAssets("html/comment.html"); commentHtml = commentHtml.replace("{user_icon_url}", comment.getUser().getProfileImageUrl()) .replace("{user_name}", comment.getUser().getId()) .replace("{comment_time}", TimeUtil.commentTime(comment.getCreatedAt())) .replace("{article_uuid}", mItemUuid).replace("{comment_id}", comment.getId()); Document commentDoc = Jsoup.parse(commentHtml); Element eComment = commentDoc.getElementsByClass("comment-box").first(); eComment.getElementsByClass("message").first().append(comment.getRenderedBody()); // remove comment edit block if it is not from current user if (mMyProfile == null || !mMyProfile.getId().equals(comment.getUser().getId())) { String commentId = "comment_{comment_id}_{user_name}" .replace("{comment_id}", comment.getId()) .replace("{user_name}", comment.getUser().getId()); Element commentEditor = commentDoc.getElementById(commentId); commentEditor.remove(); } content.appendChild(eComment); } String result = fullBody.outerHtml(); mCommentsView.loadDataWithBaseURL("http://qiita.com/", result, null, null, null); } catch (IOException e) { e.printStackTrace(); } } else { mCommentCount.setText("0"); mCommentInfo.setText(getString(R.string.article_comment, 0, getString(R.string.comment_plural))); mCommentsView.setVisibility(View.GONE); } }
From source file:im.ene.lab.attiq.ui.activities.ProfileActivity.java
@SuppressWarnings("unused") public void onEventMainThread(DocumentEvent event) { if (event.document != null) { Elements stats = event.document.getElementsByClass("userActivityChart_stats"); Element statBlock;//from ww w . j a va 2 s . com if (!UIUtil.isEmpty(stats) && (statBlock = stats.first()) != null) { Elements statElements = statBlock.children(); Integer contribution = null; for (Element element : statElements) { String unit = element.getElementsByClass("userActivityChart_statUnit").text(); if ("Contribution".equals(unit.trim())) { try { contribution = Integer .valueOf(element.getElementsByClass("userActivityChart_statCount").text()); } catch (NumberFormatException er) { er.printStackTrace(); } break; } } if (contribution != null) { ((State) mState).contribution = contribution; EventBus.getDefault().post(new StateEvent<>(getClass().getSimpleName(), true, null, mState)); } } } }
From source file:org.keionline.keionline.ArticleView.java
private String getContent(String url) throws IOException { Document doc = Jsoup.connect(url).userAgent("Mozilla").get(); Element data = doc.getElementsByClass("node").first();// get the third content div, Elements select = data.select("img"); // Change the links to absolute!! so that images work for (Element e : select) { e.attr("src", e.absUrl("src")); }// w ww . j a v a 2 s.c o m select = data.select("a"); for (Element e : select) { e.attr("href", e.absUrl("href")); } Element info = data.getElementsByClass("submitted").first(); info.after("<hr>"); String cont = data.toString(); cont = CSS + cont + "</body>"; content = cont; return cont; }
From source file:org.loklak.api.search.WordpressCrawlerService.java
public static SusiThought crawlWordpress(String blogURL) { Document blogHTML = null;//from ww w .j a v a 2s.co m Elements articles = null; Elements articleList_title = null; Elements articleList_content = null; Elements articleList_dateTime = null; Elements articleList_author = null; String[][] blogPosts = new String[100][4]; // blogPosts[][0] = Blog Title // blogPosts[][1] = Posted On // blogPosts[][2] = Author // blogPosts[][3] = Blog Content Integer numberOfBlogs = 0; Integer iterator = 0; try { blogHTML = Jsoup.connect(blogURL).get(); } catch (IOException e) { e.printStackTrace(); } articles = blogHTML.getElementsByTag("article"); iterator = 0; for (Element article : articles) { articleList_title = article.getElementsByClass("entry-title"); for (Element blogs : articleList_title) { blogPosts[iterator][0] = blogs.text().toString(); } articleList_dateTime = article.getElementsByClass("posted-on"); for (Element blogs : articleList_dateTime) { blogPosts[iterator][1] = blogs.text().toString(); } articleList_author = article.getElementsByClass("byline"); for (Element blogs : articleList_author) { blogPosts[iterator][2] = blogs.text().toString(); } articleList_content = article.getElementsByClass("entry-content"); for (Element blogs : articleList_content) { blogPosts[iterator][3] = blogs.text().toString(); } iterator++; } numberOfBlogs = iterator; JSONArray blog = new JSONArray(); for (int k = 0; k < numberOfBlogs; k++) { JSONObject blogpost = new JSONObject(); blogpost.put("blog_url", blogURL); blogpost.put("title", blogPosts[k][0]); blogpost.put("posted_on", blogPosts[k][1]); blogpost.put("author", blogPosts[k][2]); blogpost.put("content", blogPosts[k][3]); blog.put(blogpost); } SusiThought json = new SusiThought(); json.setData(blog); return json; }
From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java
@Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("getMetadata() " + options.toString()); // check if there is a md in the result if (options.getResult() != null && options.getResult().getMetadata() != null) { LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult()); return options.getResult().getMetadata(); }//from w w w . ja v a 2s. c om MediaMetadata md = new MediaMetadata(providerInfo.getId()); String imdbId = ""; // imdbId from searchResult if (options.getResult() != null) { imdbId = options.getResult().getIMDBId(); } // imdbid from scraper option if (!MetadataUtil.isValidImdbId(imdbId)) { imdbId = options.getImdbId(); } if (!MetadataUtil.isValidImdbId(imdbId)) { return md; } LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId); md.setId(MediaMetadata.IMDBID, imdbId); ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor); ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>( executor); // worker for imdb request (/combined) (everytime from akas.imdb.com) // StringBuilder sb = new StringBuilder(imdbSite.getSite()); StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/combined"); Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2()); Future<Document> futureCombined = compSvcImdb.submit(worker); // worker for imdb request (/plotsummary) (from chosen site) Future<Document> futurePlotsummary = null; sb = new StringBuilder(imdbSite.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/plotsummary"); worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2()); futurePlotsummary = compSvcImdb.submit(worker); // worker for tmdb request Future<MediaMetadata> futureTmdb = null; if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) { Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry()); futureTmdb = compSvcTmdb.submit(worker2); } Document doc; doc = futureCombined.get(); /* * title and year have the following structure * * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div> */ // parse title and year Element title = doc.getElementById("tn15title"); if (title != null) { Element element = null; // title Elements elements = title.getElementsByTag("h1"); if (elements.size() > 0) { element = elements.first(); String movieTitle = cleanString(element.ownText()); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } // year elements = title.getElementsByTag("span"); if (elements.size() > 0) { element = elements.first(); String content = element.text(); // search year Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)"); Matcher matcher = yearPattern.matcher(content); while (matcher.find()) { if (matcher.group(1) != null) { String movieYear = matcher.group(1); md.storeMetadata(MediaMetadata.YEAR, movieYear); break; } } } // original title elements = title.getElementsByAttributeValue("class", "title-extra"); if (elements.size() > 0) { element = elements.first(); String content = element.text(); content = content.replaceAll("\\(original title\\)", "").trim(); md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content); } } // poster Element poster = doc.getElementById("primary-poster"); if (poster != null) { String posterUrl = poster.attr("src"); posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_"); processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl); } /* * <div class="starbar-meta"> <b>7.4/10</b> <a href="ratings" class="tn15more">52,871 votes</a> » </div> */ // rating and rating count Element ratingElement = doc.getElementById("tn15rating"); if (ratingElement != null) { Elements elements = ratingElement.getElementsByClass("starbar-meta"); if (elements.size() > 0) { Element div = elements.get(0); // rating comes in <b> tag Elements b = div.getElementsByTag("b"); if (b.size() == 1) { String ratingAsString = b.text(); Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10"); Matcher matcher = ratingPattern.matcher(ratingAsString); while (matcher.find()) { if (matcher.group(1) != null) { float rating = 0; try { rating = Float.valueOf(matcher.group(1)); } catch (Exception e) { } md.storeMetadata(MediaMetadata.RATING, rating); break; } } } // count Elements a = div.getElementsByAttributeValue("href", "ratings"); if (a.size() == 1) { String countAsString = a.text().replaceAll("[.,]|votes", "").trim(); int voteCount = 0; try { voteCount = Integer.parseInt(countAsString); } catch (Exception e) { } md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount); } } // top250 elements = ratingElement.getElementsByClass("starbar-special"); if (elements.size() > 0) { Elements a = elements.get(0).getElementsByTag("a"); if (a.size() > 0) { Element anchor = a.get(0); Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})"); Matcher matcher = topPattern.matcher(anchor.ownText()); while (matcher.find()) { if (matcher.group(1) != null) { int top250 = 0; try { top250 = Integer.parseInt(matcher.group(1)); } catch (Exception e) { } md.storeMetadata(MediaMetadata.TOP_250, top250); } } } } } // parse all items coming by <div class="info"> Elements elements = doc.getElementsByClass("info"); for (Element element : elements) { // only parse divs if (!"div".equals(element.tag().getName())) { continue; } // elements with h5 are the titles of the values Elements h5 = element.getElementsByTag("h5"); if (h5.size() > 0) { Element firstH5 = h5.first(); String h5Title = firstH5.text(); // release date /* * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline" * href="/title/tt0114746/releaseinfo" * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a> </div></div> */ if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element releaseDateElement = div.first(); String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", "")); Pattern pattern = Pattern.compile("(.*)\\(.*\\)"); Matcher matcher = pattern.matcher(releaseDate); if (matcher.find()) { try { SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy"); Date parsedDate = sdf.parse(matcher.group(1)); sdf = new SimpleDateFormat("dd-MM-yyyy"); md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate)); } catch (Exception e) { } } } } /* * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline" * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See * more</a> » </div></div> */ // tagline if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*") && !options.isScrapeImdbForeignLanguage()) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element taglineElement = div.first(); String tagline = cleanString(taglineElement.ownText().replaceAll("", "")); md.storeMetadata(MediaMetadata.TAGLINE, tagline); } } /* * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick= * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a> » </div> */ // genres are only scraped from akas.imdb.com if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Elements a = div.first().getElementsByTag("a"); for (Element anchor : a) { if (anchor.attr("href").matches("/Sections/Genres/.*")) { md.addGenre(getTmmGenre(anchor.ownText())); } } } } // } /* * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div> */ // runtime // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) { if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element taglineElement = div.first(); String first = taglineElement.ownText().split("\\|")[0]; String runtimeAsString = cleanString(first.replaceAll("min", "")); int runtime = 0; try { runtime = Integer.parseInt(runtimeAsString); } catch (Exception e) { // try to filter out the first number we find Pattern runtimePattern = Pattern.compile("([0-9]{2,3})"); Matcher matcher = runtimePattern.matcher(runtimeAsString); if (matcher.find()) { runtime = Integer.parseInt(matcher.group(0)); } } md.storeMetadata(MediaMetadata.RUNTIME, runtime); } } /* * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div> */ // country if (h5Title.matches("(?i)Country.*")) { Elements a = element.getElementsByTag("a"); String countries = ""; for (Element anchor : a) { Pattern pattern = Pattern.compile("/country/(.*)"); Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.matches()) { String country = matcher.group(1); if (StringUtils.isNotEmpty(countries)) { countries += ", "; } countries += country.toUpperCase(); } } md.storeMetadata(MediaMetadata.COUNTRY, countries); } /* * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div> */ // Spoken languages if (h5Title.matches("(?i)Language.*")) { Elements a = element.getElementsByTag("a"); String spokenLanguages = ""; for (Element anchor : a) { Pattern pattern = Pattern.compile("/language/(.*)"); Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.matches()) { String langu = matcher.group(1); if (StringUtils.isNotEmpty(spokenLanguages)) { spokenLanguages += ", "; } spokenLanguages += langu; } } md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages); } /* * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div> */ // certification // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) { if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) { Elements a = element.getElementsByTag("a"); for (Element anchor : a) { // certification for the right country if (anchor.attr("href").matches( "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) { Pattern certificationPattern = Pattern.compile(".*:(.*)"); Matcher matcher = certificationPattern.matcher(anchor.ownText()); Certification certification = null; while (matcher.find()) { if (matcher.group(1) != null) { certification = Certification.getCertification(options.getCountry(), matcher.group(1)); } } if (certification != null) { md.addCertification(certification); break; } } } } } /* * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick= * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div> */ // director if ("director-info".equals(element.id())) { Elements a = element.getElementsByTag("a"); for (Element anchor : a) { if (anchor.attr("href").matches("/name/nm.*")) { MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR); cm.setName(anchor.ownText()); md.addCastMember(cm); } } } } /* * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick= * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src= * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src= * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32" * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick= * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick= * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src= * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32" * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick= * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table> */ // cast elements = doc.getElementsByClass("cast"); if (elements.size() > 0) { Elements tr = elements.get(0).getElementsByTag("tr"); for (Element row : tr) { Elements td = row.getElementsByTag("td"); MediaCastMember cm = new MediaCastMember(); for (Element column : td) { // actor thumb if (column.hasClass("hs")) { Elements img = column.getElementsByTag("img"); if (img.size() > 0) { String thumbUrl = img.get(0).attr("src"); if (thumbUrl.contains("no_photo.png")) { cm.setImageUrl(""); } else { thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", ""); cm.setImageUrl(thumbUrl); } } } // actor name if (column.hasClass("nm")) { cm.setName(cleanString(column.text())); } // character if (column.hasClass("char")) { cm.setCharacter(cleanString(column.text())); } } if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) { cm.setType(CastType.ACTOR); md.addCastMember(cm); } } } Element content = doc.getElementById("tn15content"); if (content != null) { elements = content.getElementsByTag("table"); for (Element table : elements) { // writers if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) { Elements anchors = table.getElementsByTag("a"); for (Element anchor : anchors) { if (anchor.attr("href").matches("/name/nm.*")) { MediaCastMember cm = new MediaCastMember(CastType.WRITER); cm.setName(anchor.ownText()); md.addCastMember(cm); } } } // producers if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) { Elements rows = table.getElementsByTag("tr"); for (Element row : rows) { if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) { continue; } Elements columns = row.children(); if (columns.size() == 0) { continue; } MediaCastMember cm = new MediaCastMember(CastType.PRODUCER); String name = cleanString(columns.get(0).text()); if (StringUtils.isBlank(name)) { continue; } cm.setName(name); if (columns.size() >= 3) { cm.setPart(cleanString(columns.get(2).text())); } md.addCastMember(cm); } } } } // Production companies elements = doc.getElementsByClass("blackcatheader"); for (Element blackcatheader : elements) { if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) { Elements a = blackcatheader.nextElementSibling().getElementsByTag("a"); StringBuilder productionCompanies = new StringBuilder(); for (Element anchor : a) { if (StringUtils.isNotEmpty(productionCompanies)) { productionCompanies.append(", "); } productionCompanies.append(anchor.ownText()); } md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString()); break; } } /* * plot from /plotsummary */ // build the url doc = null; doc = futurePlotsummary.get(); // imdb.com has another site structure if (imdbSite == ImdbSiteDefinition.IMDB_COM) { Elements zebraList = doc.getElementsByClass("zebraList"); if (zebraList != null && !zebraList.isEmpty()) { Elements odd = zebraList.get(0).getElementsByClass("odd"); if (odd.isEmpty()) { odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even } if (odd.size() > 0) { Elements p = odd.get(0).getElementsByTag("p"); if (p.size() > 0) { String plot = cleanString(p.get(0).ownText()); md.storeMetadata(MediaMetadata.PLOT, plot); } } } } else { Element wiki = doc.getElementById("swiki.2.1"); if (wiki != null) { String plot = cleanString(wiki.ownText()); md.storeMetadata(MediaMetadata.PLOT, plot); } } // title also from chosen site if we are not scraping akas.imdb.com if (imdbSite != ImdbSiteDefinition.IMDB_COM) { title = doc.getElementById("tn15title"); if (title != null) { Element element = null; // title elements = title.getElementsByClass("main"); if (elements.size() > 0) { element = elements.first(); String movieTitle = cleanString(element.ownText()); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } } } // } // get data from tmdb? if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) { MediaMetadata tmdbMd = futureTmdb.get(); if (options.isScrapeImdbForeignLanguage() && tmdbMd != null && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) { // tmdbid md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID)); // title md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE)); // original title md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE)); // tagline md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE)); // plot md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT)); // collection info md.storeMetadata(MediaMetadata.COLLECTION_NAME, tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME)); md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET)); } if (options.isScrapeCollectionInfo() && tmdbMd != null) { md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET)); md.storeMetadata(MediaMetadata.COLLECTION_NAME, tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME)); } } // if we have still no original title, take the title if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) { md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE)); } return md; }
From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java
@Override public List<MediaSearchResult> search(MediaSearchOptions query) throws Exception { LOGGER.debug("search() " + query.toString()); /*/*from w w w .j av a2 s.c om*/ * IMDb matches seem to come in several "flavours". * * Firstly, if there is one exact match it returns the matching IMDb page. * * If that fails to produce a unique hit then a list of possible matches are returned categorised as: Popular Titles (Displaying ? Results) Titles * (Exact Matches) (Displaying ? Results) Titles (Partial Matches) (Displaying ? Results) * * We should check the Exact match section first, then the poplar titles and finally the partial matches. * * Note: That even with exact matches there can be more than 1 hit, for example "Star Trek" */ Pattern imdbIdPattern = Pattern.compile("/title/(tt[0-9]{7})/"); List<MediaSearchResult> result = new ArrayList<MediaSearchResult>(); String searchTerm = ""; if (StringUtils.isNotEmpty(query.get(SearchParam.IMDBID))) { searchTerm = query.get(SearchParam.IMDBID); } if (StringUtils.isEmpty(searchTerm)) { searchTerm = query.get(SearchParam.QUERY); } if (StringUtils.isEmpty(searchTerm)) { searchTerm = query.get(SearchParam.TITLE); } if (StringUtils.isEmpty(searchTerm)) { return result; } // parse out language and coutry from the scraper options String language = query.get(SearchParam.LANGUAGE); String myear = query.get(SearchParam.YEAR); String country = query.get(SearchParam.COUNTRY); // for passing the country to the scrape searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm); StringBuilder sb = new StringBuilder(imdbSite.getSite()); sb.append("find?q="); try { // search site was everytime in UTF-8 sb.append(URLEncoder.encode(searchTerm, "UTF-8")); } catch (UnsupportedEncodingException ex) { // Failed to encode the movie name for some reason! LOGGER.debug("Failed to encode search term: " + searchTerm); sb.append(searchTerm); } // we need to search for all - otherwise we do not find TV movies sb.append(CAT_TITLE); LOGGER.debug("========= BEGIN IMDB Scraper Search for: " + sb.toString()); Document doc; try { CachedUrl url = new CachedUrl(sb.toString()); url.addHeader("Accept-Language", getAcceptLanguage(language, country)); doc = Jsoup.parse(url.getInputStream(), "UTF-8", ""); } catch (Exception e) { LOGGER.debug("tried to fetch search response", e); // clear Cache CachedUrl.removeCachedFileForUrl(sb.toString()); return result; } // check if it was directly redirected to the site Elements elements = doc.getElementsByAttributeValue("rel", "canonical"); for (Element element : elements) { MediaMetadata md = null; // we have been redirected to the movie site String movieName = null; String movieId = null; String href = element.attr("href"); Matcher matcher = imdbIdPattern.matcher(href); while (matcher.find()) { if (matcher.group(1) != null) { movieId = matcher.group(1); } } // get full information if (!StringUtils.isEmpty(movieId)) { MediaScrapeOptions options = new MediaScrapeOptions(); options.setImdbId(movieId); options.setLanguage(MediaLanguages.valueOf(language)); options.setCountry(CountryCode.valueOf(country)); options.setScrapeCollectionInfo(Boolean.parseBoolean(query.get(SearchParam.COLLECTION_INFO))); options.setScrapeImdbForeignLanguage( Boolean.parseBoolean(query.get(SearchParam.IMDB_FOREIGN_LANGUAGE))); md = getMetadata(options); if (!StringUtils.isEmpty(md.getStringValue(MediaMetadata.TITLE))) { movieName = md.getStringValue(MediaMetadata.TITLE); } } // if a movie name/id was found - return it if (StringUtils.isNotEmpty(movieName) && StringUtils.isNotEmpty(movieId)) { MediaSearchResult sr = new MediaSearchResult(providerInfo.getId()); sr.setTitle(movieName); sr.setIMDBId(movieId); sr.setYear(md.getStringValue(MediaMetadata.YEAR)); sr.setMetadata(md); sr.setScore(1); // and parse out the poster String posterUrl = ""; Element td = doc.getElementById("img_primary"); if (td != null) { Elements imgs = td.getElementsByTag("img"); for (Element img : imgs) { posterUrl = img.attr("src"); posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_"); posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", ""); } } if (StringUtils.isNotBlank(posterUrl)) { sr.setPosterUrl(posterUrl); } result.add(sr); return result; } } // parse results // elements = doc.getElementsByClass("result_text"); elements = doc.getElementsByClass("findResult"); for (Element tr : elements) { // we only want the tr's if (!"tr".equalsIgnoreCase(tr.tagName())) { continue; } // find the id / name String movieName = ""; String movieId = ""; String year = ""; Elements tds = tr.getElementsByClass("result_text"); for (Element element : tds) { // we only want the td's if (!"td".equalsIgnoreCase(element.tagName())) { continue; } // filter out unwanted results Pattern unwanted = Pattern.compile(".*\\((TV Series|TV Episode|Short|Video Game)\\).*"); // stripped out .*\\(Video\\).*| Matcher matcher = unwanted.matcher(element.text()); if (matcher.find()) { continue; } // is there a localized name? (aka) String localizedName = ""; Elements italics = element.getElementsByTag("i"); if (italics.size() > 0) { localizedName = italics.text().replace("\"", ""); } // get the name inside the link Elements anchors = element.getElementsByTag("a"); for (Element a : anchors) { if (StringUtils.isNotEmpty(a.text())) { // movie name if (StringUtils.isNotBlank(localizedName) && !language.equals("en")) { // take AKA as title, but only if not EN movieName = localizedName; } else { movieName = a.text(); } // parse id String href = a.attr("href"); matcher = imdbIdPattern.matcher(href); while (matcher.find()) { if (matcher.group(1) != null) { movieId = matcher.group(1); } } // try to parse out the year Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)"); matcher = yearPattern.matcher(element.text()); while (matcher.find()) { if (matcher.group(1) != null) { year = matcher.group(1); break; } } break; } } } // if an id/name was found - parse the poster image String posterUrl = ""; tds = tr.getElementsByClass("primary_photo"); for (Element element : tds) { Elements imgs = element.getElementsByTag("img"); for (Element img : imgs) { posterUrl = img.attr("src"); posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_"); posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", ""); } } // if no movie name/id was found - continue if (StringUtils.isEmpty(movieName) || StringUtils.isEmpty(movieId)) { continue; } MediaSearchResult sr = new MediaSearchResult(providerInfo.getId()); sr.setTitle(movieName); sr.setIMDBId(movieId); sr.setYear(year); sr.setPosterUrl(posterUrl); // populate extra args MetadataUtil.copySearchQueryToSearchResult(query, sr); if (movieId.equals(query.get(SearchParam.IMDBID))) { // perfect match sr.setScore(1); } else { // compare score based on names float score = MetadataUtil.calculateScore(searchTerm, movieName); if (posterUrl.isEmpty() || posterUrl.contains("nopicture")) { LOGGER.debug("no poster - downgrading score by 0.01"); score = score - 0.01f; } if (myear != null && !myear.isEmpty() && !myear.equals("0") && !myear.equals(year)) { LOGGER.debug("parsed year does not match search result year - downgrading score by 0.01"); score = score - 0.01f; } sr.setScore(score); } result.add(sr); // only get 40 results if (result.size() >= 40) { break; } } Collections.sort(result); Collections.reverse(result); return result; }