Example usage for org.jsoup.nodes Element getElementsByClass

List of usage examples for org.jsoup.nodes Element getElementsByClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByClass.

Prototype

public Elements getElementsByClass(String className) 

Source Link

Document

Find elements that have this class, including or under this element.

Usage

From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java

public FetchResult updateData(Context context, boolean force) {
    //Open database
    DBOpenHelper dbhelper = new DBOpenHelper(context);
    SQLiteDatabase db = dbhelper.getWritableDatabase();

    // check for internet connectivity
    try {/*from   w ww  .  j  av  a 2 s.com*/
        if (!isOnline(context)) {
            Log.d(TAG, "We do not seem to be online. Skipping Update.");
            return FetchResult.NOTONLINE;
        }
    } catch (Exception e) {
        exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()");
    }
    SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context);
    if (!force) {
        try {
            if (sp.getBoolean("loginFailed", false) == true) {
                Log.d(TAG, "Previous login failed. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update.");
                return FetchResult.LOGINFAILED;
            }
            if (sp.getBoolean("autoupdates", true) == false) {
                Log.d(TAG, "Automatic updates not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Background data not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true)
                    && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Auto sync not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) {
                Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update");
                DBLog.insertMessage(context, "i", TAG,
                        "On wifi, and wifi auto updates not allowed. Skipping Update");
                return FetchResult.NOTALLOWED;
            } else if (!isWifi(context)) {
                Log.d(TAG, "We are not on wifi.");
                if (!isRoaming(context) && !sp.getBoolean("2DData", true)) {
                    Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) {
                    Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                }

            }
        } catch (Exception e) {
            exceptionReporter.reportException(Thread.currentThread(), e,
                    "Exception while finding if to update.");
        }

    } else {
        Log.d(TAG, "Update Forced");
    }

    try {
        String username = sp.getString("username", null);
        String password = sp.getString("password", null);
        if (username == null || password == null) {
            DBLog.insertMessage(context, "i", TAG, "Username or password not set.");
            return FetchResult.USERNAMEPASSWORDNOTSET;
        }

        // Find the URL of the page to send login data to.
        Log.d(TAG, "Finding Action. ");
        HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login");
        String loginPageString = loginPageGet.execute();
        if (loginPageString != null) {
            Document loginPage = Jsoup.parse(loginPageString,
                    "https://secure.2degreesmobile.co.nz/web/ip/login");
            Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first();
            String loginAction = loginForm.attr("action");
            // Send login form
            List<NameValuePair> loginValues = new ArrayList<NameValuePair>();
            loginValues.add(new BasicNameValuePair("externalURLRedirect", ""));
            loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin"));
            loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M"));
            loginValues.add(new BasicNameValuePair("hdnlocale", ""));

            loginValues.add(new BasicNameValuePair("userid", username));
            loginValues.add(new BasicNameValuePair("password", password));
            Log.d(TAG, "Sending Login ");
            HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues);
            // Parse result

            String loginResponse = sendLoginPoster.execute();
            Document loginResponseParsed = Jsoup.parse(loginResponse);
            // Determine if this is a pre-pay or post-paid account.
            boolean postPaid;
            if (loginResponseParsed
                    .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) {
                Log.d(TAG, "Pre-pay account or no account.");
                postPaid = false;
            } else {
                Log.d(TAG, "Post-paid account.");
                postPaid = true;
            }

            String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home";
            if (postPaid) {
                homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid";
            }
            HttpGetter homepageGetter = new HttpGetter(homepageUrl);
            String homepageHTML = homepageGetter.execute();
            Document homePage = Jsoup.parse(homepageHTML);

            Element accountSummary = homePage.getElementById("accountSummary");
            if (accountSummary == null) {
                Log.d(TAG, "Login failed.");
                return FetchResult.LOGINFAILED;
            }
            db.delete("cache", "", null);
            /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful.
             * Might reconsider this.
             *
             if (postPaid) {
                     
               Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first();
               Elements rows = accountBalanceSummaryTable.getElementsByTag("tr");
               int rowno = 0;
               for (Element row : rows) {
                  if (rowno > 1) {
             break;
                  }
                  //Log.d(TAG, "Starting row");
                  //Log.d(TAG, row.html());
                  Double value;
                  try {
             Element amount = row.getElementsByClass("tableBillamount").first();
             String amountHTML = amount.html();
             Log.d(TAG, amountHTML.substring(1));
             value = Double.parseDouble(amountHTML.substring(1));
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse amount from row.");
             value = null;
                  }
                  String expiresDetails = "";
                  String expiresDate = null;
                  String name = null;
                  try {
             Element details = row.getElementsByClass("tableBilldetail").first();
             name = details.ownText();
             Element expires = details.getElementsByTag("em").first();
             if (expires != null) {
                 expiresDetails = expires.text();
             } 
             Log.d(TAG, expiresDetails);
             Pattern pattern;
             pattern = Pattern.compile("\\(payment is due (.*)\\)");
             Matcher matcher = pattern.matcher(expiresDetails);
             if (matcher.find()) {
                /*Log.d(TAG, "matched expires");
                Log.d(TAG, "group 0:" + matcher.group(0));
                Log.d(TAG, "group 1:" + matcher.group(1));
                Log.d(TAG, "group 2:" + matcher.group(2)); *
                String expiresDateString = matcher.group(1);
                Date expiresDateObj;
                if (expiresDateString != null) {
                   if (expiresDateString.length() > 0) {
                      try {
                         expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                         expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                      } catch (java.text.ParseException e) {
                         Log.d(TAG, "Could not parse date: " + expiresDateString);
                      }
                   }   
                }
             }
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse details from row.");
                  }
                  String expirev = null;
                  ContentValues values = new ContentValues();
                  values.put("name", name);
                  values.put("value", value);
                  values.put("units", "$NZ");
                  values.put("expires_value", expirev );
                  values.put("expires_date", expiresDate);
                  db.insert("cache", "value", values );
                  rowno++;
               }
            } */
            Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first();
            Elements rows = accountSummaryTable.getElementsByTag("tr");
            for (Element row : rows) {
                // We are now looking at each of the rows in the data table.
                //Log.d(TAG, "Starting row");
                //Log.d(TAG, row.html());
                Double value;
                String units;
                try {
                    Element amount = row.getElementsByClass("tableBillamount").first();
                    String amountHTML = amount.html();
                    //Log.d(TAG, amountHTML);
                    String[] amountParts = amountHTML.split("&nbsp;", 2);
                    //Log.d(TAG, amountParts[0]);
                    //Log.d(TAG, amountParts[1]);
                    if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need")
                            || amountParts[0].equals("Unlimited Text*")) {
                        value = Values.INCLUDED;
                    } else {
                        try {
                            value = Double.parseDouble(amountParts[0]);
                        } catch (NumberFormatException e) {
                            exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value.");
                            value = 0.0;
                        }
                    }
                    units = amountParts[1];
                } catch (NullPointerException e) {
                    //Log.d(TAG, "Failed to parse amount from row.");
                    value = null;
                    units = null;
                }
                Element details = row.getElementsByClass("tableBilldetail").first();
                String name = details.getElementsByTag("strong").first().text();
                Element expires = details.getElementsByTag("em").first();
                String expiresDetails = "";
                if (expires != null) {
                    expiresDetails = expires.text();
                }
                Log.d(TAG, expiresDetails);
                Pattern pattern;
                if (postPaid == false) {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)");
                } else {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)");
                }
                Matcher matcher = pattern.matcher(expiresDetails);
                Double expiresValue = null;
                String expiresDate = null;
                if (matcher.find()) {
                    /*Log.d(TAG, "matched expires");
                    Log.d(TAG, "group 0:" + matcher.group(0));
                    Log.d(TAG, "group 1:" + matcher.group(1));
                    Log.d(TAG, "group 2:" + matcher.group(2)); */
                    try {
                        expiresValue = Double.parseDouble(matcher.group(1));
                    } catch (NumberFormatException e) {
                        expiresValue = null;
                    }
                    String expiresDateString = matcher.group(2);
                    Date expiresDateObj;
                    if (expiresDateString != null) {
                        if (expiresDateString.length() > 0) {
                            try {
                                expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                                expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                            } catch (java.text.ParseException e) {
                                Log.d(TAG, "Could not parse date: " + expiresDateString);
                            }
                        }
                    }
                }
                ContentValues values = new ContentValues();
                values.put("name", name);
                values.put("value", value);
                values.put("units", units);
                values.put("expires_value", expiresValue);
                values.put("expires_date", expiresDate);
                db.insert("cache", "value", values);
            }

            if (postPaid == false) {
                Log.d(TAG, "Getting Value packs...");
                // Find value packs
                HttpGetter valuePacksPageGet = new HttpGetter(
                        "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack");
                String valuePacksPageString = valuePacksPageGet.execute();
                //DBLog.insertMessage(context, "d", "",  valuePacksPageString);
                if (valuePacksPageString != null) {
                    Document valuePacksPage = Jsoup.parse(valuePacksPageString);
                    Elements enabledPacks = valuePacksPage.getElementsByClass("yellow");
                    for (Element enabledPack : enabledPacks) {
                        Element offerNameElemt = enabledPack
                                .getElementsByAttributeValueStarting("name", "offername").first();
                        if (offerNameElemt != null) {
                            String offerName = offerNameElemt.val();
                            DBLog.insertMessage(context, "d", "", "Got element: " + offerName);
                            ValuePack[] packs = Values.valuePacks.get(offerName);
                            if (packs == null) {
                                DBLog.insertMessage(context, "d", "",
                                        "Offer name: " + offerName + " not matched.");
                            } else {
                                for (ValuePack pack : packs) {
                                    ContentValues values = new ContentValues();
                                    values.put("plan_startamount", pack.value);
                                    values.put("plan_name", offerName);
                                    DBLog.insertMessage(context, "d", "",
                                            "Pack " + pack.type.id + " start value set to " + pack.value);
                                    db.update("cache", values, "name = '" + pack.type.id + "'", null);
                                }
                            }
                        }
                    }
                }
            }

            SharedPreferences.Editor prefedit = sp.edit();
            Date now = new Date();
            prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now));
            prefedit.putBoolean("loginFailed", false);
            prefedit.putBoolean("networkError", false);
            prefedit.commit();
            DBLog.insertMessage(context, "i", TAG, "Update Successful");
            return FetchResult.SUCCESS;

        }
    } catch (ClientProtocolException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } catch (IOException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } finally {
        db.close();
    }
    return null;
}

From source file:ca.zadrox.dota2esportticker.service.UpdateMatchService.java

private void updateMatches(boolean doResults) {

    if (!checkForConnectivity()) {
        LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_NO_CONNECTIVITY));
        return;//from   w  ww .  j a va  2s  . co  m
    }

    LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_STARTED));

    final String BASE_URL = "http://www.gosugamers.net/dota2/gosubet";
    final String MATCH_LINK_URL_BASE = "http://www.gosugamers.net";

    try {

        String rawHtml = new OkHttpClient().newCall(new Request.Builder().url(BASE_URL).build()).execute()
                .body().string();

        rawHtml = rawHtml.substring(rawHtml.indexOf("<div id=\"col1\" class=\"rows\">"),
                rawHtml.indexOf("<div id=\"col2\" class=\"rows\">"));
        Document doc = Jsoup.parse(rawHtml);

        Elements tables = doc.getElementsByClass("matches");

        ArrayList<ArrayList<String>> matchLinks = new ArrayList<ArrayList<String>>(tables.size());

        int numSeries = 0;
        for (Element table : tables) {
            Elements links = table.getElementsByClass("match");
            if (links.size() != 0) {
                ArrayList<String> innerMatchLink = new ArrayList<String>(links.size());
                for (Element link : links) {
                    String linkHref = link.attr("href");
                    innerMatchLink.add(MATCH_LINK_URL_BASE + linkHref);
                    numSeries++;
                }
                matchLinks.add(innerMatchLink);
            }
        }

        // needed if there are massive reschedules to update content properly.
        Uri resultsUri = MatchContract.SeriesEntry.buildSeriesUriWithAfterTime(TimeUtils.getUTCTime());

        Cursor c = getContentResolver().query(resultsUri,
                new String[] { MatchContract.SeriesEntry.COLUMN_GG_MATCH_PAGE }, null, null, null);

        while (c.moveToNext()) {
            if (!matchLinks.get(0).contains(c.getString(0))) {
                matchLinks.get(0).add(c.getString(0));
            }
        }

        Iterator<ArrayList<String>> iterator = matchLinks.iterator();
        int numResults = 0;
        ExecutorService executorService = Executors.newFixedThreadPool(10);
        ArrayList<Future<BundledMatchItem>> seriesItemFutures = new ArrayList<Future<BundledMatchItem>>(
                numSeries);

        LogUtils.LOGD(TAG, "Starting Retrieval, num elements gathered: " + numSeries);
        int i = 0;
        while (iterator.hasNext()) {

            ArrayList<String> matchList = iterator.next();
            for (String matchUrl : matchList) {
                boolean hasResult = !iterator.hasNext();
                if (!doResults && hasResult) {
                    continue;
                } else if (hasResult) {
                    numResults++;
                }
                seriesItemFutures.add(executorService.submit(new MatchGetter(matchUrl, hasResult)));
                i++;
            }
        }
        executorService.shutdown();
        executorService.awaitTermination(20L, TimeUnit.SECONDS);
        LogUtils.LOGD(TAG, "Stopping Retrieval, elements submitted for fetching: " + i);

        ContentValues[] seriesEntries = new ContentValues[i];
        ContentValues[] resultEntries = new ContentValues[numResults];
        int seriesEntryWriteIndex = 0;
        int resultEntryWriteIndex = 0;

        for (Future<BundledMatchItem> seriesItemFuture : seriesItemFutures) {
            try {
                BundledMatchItem seriesItem = seriesItemFuture.get();
                if (seriesItem != null) {
                    seriesEntries[seriesEntryWriteIndex] = seriesItem.mMatch;
                    seriesEntryWriteIndex++;
                    if (seriesItem.hasResult) {
                        resultEntries[resultEntryWriteIndex] = seriesItem.mResult;
                        resultEntryWriteIndex++;
                    }
                }
            } catch (ExecutionException e) {
                Log.e(TAG, "Should never get here");
            }
        }

        this.getContentResolver().bulkInsert(MatchContract.SeriesEntry.CONTENT_URI, seriesEntries);

        if (doResults)
            this.getContentResolver().bulkInsert(MatchContract.ResultEntry.CONTENT_URI, resultEntries);

        PrefUtils.setLastUpdateTime(this, TimeUtils.getUTCTime());

    } catch (IOException e) {
        Log.e(TAG, e.getMessage(), e);
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }

    LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(UPDATE_COMPLETE));

    PrefUtils.setLastResultsUpdateTime(this, TimeUtils.getUTCTime());
}

From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java

/**
 * Grab contents from the forum that the user clicked on
 * @param doc      The document parsed from the link
 * @param id      The id number of the link
 * @param isMarket    True if the link is from a marketplace category
 *///from   www.  j  ava2  s  .  com
public void getCategoryContents(Document doc, String id, boolean isMarket) {

    // Update pagination
    try {
        Elements pageNumbers = doc.select("div[class=pagenav]");
        Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]");
        thisPage = pageLinks.text().split(" ")[1];
        finalPage = pageLinks.text().split(" ")[3];
    } catch (Exception e) {
    }

    // Make sure id contains only numbers
    if (!isNewTopicActivity)
        id = Utils.parseInts(id);

    // Grab each thread
    Elements threadListing = doc.select("table[id=threadslist] > tbody > tr");

    for (Element thread : threadListing) {
        try {
            boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false,
                    isPoll = false;
            String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "",
                    lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = "";

            Elements announcementContainer = thread.select("td[colspan=5]");
            Elements threadTitleContainer = thread.select("a[id^=thread_title]");

            // We could have two different types of threads.  Announcement threads are 
            // completely different than the other types of threads (sticky, locked, etc)
            // so we need to play some games here
            if (announcementContainer != null && !announcementContainer.isEmpty()) {
                Log.d(TAG, "Announcement Thread Found");

                Elements annThread = announcementContainer.select("div > a");
                Elements annUser = announcementContainer.select("div > span[class=smallfont]");
                formattedTitle = "Announcement: " + annThread.first().text();
                threadUser = annUser.last().text();
                threadLink = annThread.attr("href");
                isAnnounce = true;
            } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) {
                Element threadLinkEl = thread.select("a[id^=thread_title]").first();
                Element repliesText = thread.select("td[title^=Replies]").first();
                Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first();
                Element threadicon = thread.select("img[id^=thread_statusicon_]").first();
                Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first();
                Element threadDateFull = thread.select("td[title^=Replies:] > div").first();

                try {
                    isSticky = threadDiv.text().contains("Sticky:");
                } catch (Exception e) {
                }

                try {
                    isPoll = threadDiv.text().contains("Poll:");
                } catch (Exception e) {
                }

                try {
                    String icSt = threadicon.attr("src");
                    isLocked = (icSt.contains("lock") && icSt.endsWith(".gif"));
                } catch (Exception e) {
                }

                String preString = "";
                try {
                    preString = threadDiv.select("span > b").text();
                } catch (Exception e) {
                }

                try {
                    hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty();
                } catch (Exception e) {
                }

                // Find the last page if it exists
                try {
                    lastPage = threadDiv.select("span").last().select("a").last().attr("href");
                } catch (Exception e) {
                }

                threadDate = threadDateFull.text();
                int findAMPM = threadDate.indexOf("M") + 1;
                threadDate = threadDate.substring(0, findAMPM);

                String totalPostsInThreadTitle = threadicon.attr("alt");

                if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0)
                    totalPosts = totalPostsInThreadTitle.split(" ")[2];

                // Remove page from the link
                String realLink = Utils.removePageFromLink(link);

                if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) {

                    String txt = repliesText.getElementsByClass("alt2").attr("title");
                    String splitter[] = txt.split(" ", 4);

                    postCount = splitter[1].substring(0, splitter[1].length() - 1);
                    views = splitter[3];

                    try {
                        if (this.isNewTopicActivity)
                            forum = thread.select("td[class=alt1]").last().text();
                    } catch (Exception e) {
                    }

                    formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "",
                            preString.length() == 0 ? "" : preString + " ", threadLinkEl.text());
                }

                threadUser = threaduser.text();
                lastUser = repliesText.select("a[href*=members]").text();
                threadLink = threadLinkEl.attr("href");
            }

            // Add our thread to our list as long as the thread
            // contains a title
            if (!formattedTitle.equals("")) {
                ThreadModel tv = new ThreadModel();
                tv.setTitle(formattedTitle);
                tv.setStartUser(threadUser);
                tv.setLastUser(lastUser);
                tv.setLink(threadLink);
                tv.setLastLink(lastPage);
                tv.setPostCount(postCount);
                tv.setMyPosts(totalPosts);
                tv.setViewCount(views);
                tv.setLocked(isLocked);
                tv.setSticky(isSticky);
                tv.setAnnouncement(isAnnounce);
                tv.setPoll(isPoll);
                tv.setHasAttachment(hasAttachment);
                tv.setForum(forum);
                tv.setLastPostTime(threadDate);
                threadlist.add(tv);
            } else if (thread.text()
                    .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) {
                Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size()));
                if (threadlist.size() > 0) {
                    ThreadModel ltv = threadlist.get(threadlist.size() - 1);
                    Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle()));
                }

                if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext()))
                    threadlist.add(new ThreadModel(true));
                else {
                    Log.d(TAG, "User Chose To Hide Old Threads");
                    break;
                }
            }
        } catch (Exception e) {
            Log.e(TAG, "Error Parsing That Thread...", e);
            Log.d(TAG, "Thread may have moved");
        }
    }
}

From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageInboxFragment.java

/**
 * Construct view by grabbing all private messages.  This is only done
 * if the view is called for the first time.  If there was a savedinstance
 * of the view then this is not called/*from   w w  w.  jav  a  2s .  co m*/
 */
private void constructView() {
    this.showOutbound = getArguments().getBoolean(showOutboundExtra, false);

    AsyncTask<Void, String, Void> updaterTask = new AsyncTask<Void, String, Void>() {
        @Override
        protected void onPreExecute() {

            loadingDialog = ProgressDialog.show(getActivity(), getString(R.string.loading),
                    getString(R.string.pleaseWait), true);
        }

        @Override
        protected Void doInBackground(Void... params) {
            Document doc = VBForumFactory.getInstance().get(getActivity(),
                    showOutbound ? WebUrls.pmSentUrl : WebUrls.pmInboxUrl);

            if (doc != null) {
                token = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken");
                String current_month = getMonthForInt(0);
                Elements collapse = doc
                        .select(showOutbound ? "tbody[id^=collapseobj_pmf-1]" : "tbody[id^=collapseobj_pmf0]");

                publishProgress(getString(R.string.asyncDialogGrabPMs));
                for (Element coll : collapse) {
                    Elements trs = coll.select("tr");
                    for (Element tr : trs) {
                        Elements alt1s = tr.getElementsByClass("alt1Active");
                        for (Element alt1 : alt1s) {

                            Elements divs = alt1.select("div");

                            // First grab our link
                            Elements linkElement = divs.get(0).select("a[rel=nofollow]");
                            String pmLink = linkElement.attr("href");

                            // There should be two divs here with text in it
                            // the first is 'MM-DD-YYYY Subject'
                            String dateSubject = divs.get(0).text();
                            String[] dateSubjectSplit = dateSubject.split(" ", 2);

                            // The second is HH:MM AMPM User
                            String timeTimeUser = divs.get(1).text();
                            String[] timeTimeUserSplit = timeTimeUser.split(" ", 3);

                            // Create new pm
                            PMModel pm = new PMModel();
                            pm.setDate(dateSubjectSplit[0]);

                            // Check the month before we go further
                            String this_month = getMonthForInt(Integer.parseInt(pm.getDate().split("-")[0]));
                            if (!current_month.equals(this_month)) {
                                current_month = this_month;
                                PMModel pm_m = new PMModel();
                                pm_m.setTitle(String.format("%s - %s", this_month,
                                        showOutbound ? getResources().getString(R.string.inboxSent)
                                                : getResources().getString(R.string.inboxInbox)));
                                pmlist.add(pm_m);
                            }

                            pm.setTime(timeTimeUserSplit[0] + timeTimeUserSplit[1]);
                            pm.setTitle(dateSubjectSplit[1]);
                            pm.setUser(timeTimeUserSplit[2]);
                            pm.setLink(pmLink);
                            pm.setToken(token);

                            Log.v(TAG, "Adding PM From: " + pm.getUser());
                            pmlist.add(pm);
                        }
                    }
                }
                updateList();
            }
            return null;
        }

        @Override
        protected void onProgressUpdate(String... progress) {
            if (loadingDialog != null)
                loadingDialog.setMessage(progress[0]);
        }

        @Override
        protected void onPostExecute(Void result) {
            loadingDialog.dismiss();
        }
    };
    updaterTask.execute();
}

From source file:im.ene.lab.attiq.ui.activities.ItemDetailActivity.java

@SuppressWarnings("unused")
public void onEventMainThread(ItemCommentsEvent event) {
    if (!UIUtil.isEmpty(event.comments)) {
        mCommentsView.setVisibility(View.VISIBLE);
        List<Comment> comments = event.comments;

        mCommentCount.setText(comments.size() + "");

        String info = comments.size() == 1 ? getString(R.string.comment_singular)
                : getString(R.string.comment_plural);
        // FIXME should use plural strings
        mCommentInfo.setText(getString(R.string.article_comment, comments.size(), info));

        final String html;
        try {//from   w w  w  .  ja  v  a2s  .  c  o m
            html = IOUtil.readAssets("html/comments.html");

            Document fullBody = Jsoup.parse(html);
            Element content = fullBody.getElementById("content");

            for (Comment comment : comments) {
                String commentHtml = IOUtil.readAssets("html/comment.html");
                commentHtml = commentHtml.replace("{user_icon_url}", comment.getUser().getProfileImageUrl())
                        .replace("{user_name}", comment.getUser().getId())
                        .replace("{comment_time}", TimeUtil.commentTime(comment.getCreatedAt()))
                        .replace("{article_uuid}", mItemUuid).replace("{comment_id}", comment.getId());

                Document commentDoc = Jsoup.parse(commentHtml);
                Element eComment = commentDoc.getElementsByClass("comment-box").first();
                eComment.getElementsByClass("message").first().append(comment.getRenderedBody());
                // remove comment edit block if it is not from current user
                if (mMyProfile == null || !mMyProfile.getId().equals(comment.getUser().getId())) {
                    String commentId = "comment_{comment_id}_{user_name}"
                            .replace("{comment_id}", comment.getId())
                            .replace("{user_name}", comment.getUser().getId());
                    Element commentEditor = commentDoc.getElementById(commentId);
                    commentEditor.remove();
                }

                content.appendChild(eComment);
            }

            String result = fullBody.outerHtml();
            mCommentsView.loadDataWithBaseURL("http://qiita.com/", result, null, null, null);
        } catch (IOException e) {
            e.printStackTrace();
        }
    } else {
        mCommentCount.setText("0");
        mCommentInfo.setText(getString(R.string.article_comment, 0, getString(R.string.comment_plural)));
        mCommentsView.setVisibility(View.GONE);
    }
}

From source file:im.ene.lab.attiq.ui.activities.ProfileActivity.java

@SuppressWarnings("unused")
public void onEventMainThread(DocumentEvent event) {
    if (event.document != null) {
        Elements stats = event.document.getElementsByClass("userActivityChart_stats");
        Element statBlock;//from ww w  . j a  va 2 s  .  com
        if (!UIUtil.isEmpty(stats) && (statBlock = stats.first()) != null) {
            Elements statElements = statBlock.children();
            Integer contribution = null;
            for (Element element : statElements) {
                String unit = element.getElementsByClass("userActivityChart_statUnit").text();
                if ("Contribution".equals(unit.trim())) {
                    try {
                        contribution = Integer
                                .valueOf(element.getElementsByClass("userActivityChart_statCount").text());
                    } catch (NumberFormatException er) {
                        er.printStackTrace();
                    }

                    break;
                }
            }

            if (contribution != null) {
                ((State) mState).contribution = contribution;
                EventBus.getDefault().post(new StateEvent<>(getClass().getSimpleName(), true, null, mState));
            }
        }
    }
}

From source file:org.keionline.keionline.ArticleView.java

private String getContent(String url) throws IOException {
    Document doc = Jsoup.connect(url).userAgent("Mozilla").get();
    Element data = doc.getElementsByClass("node").first();// get the third content div,
    Elements select = data.select("img");
    // Change the links to absolute!! so that images work
    for (Element e : select) {
        e.attr("src", e.absUrl("src"));
    }// w ww  . j a v  a 2  s.c  o  m
    select = data.select("a");
    for (Element e : select) {
        e.attr("href", e.absUrl("href"));
    }
    Element info = data.getElementsByClass("submitted").first();
    info.after("<hr>");
    String cont = data.toString();
    cont = CSS + cont + "</body>";
    content = cont;
    return cont;
}

From source file:org.loklak.api.search.WordpressCrawlerService.java

public static SusiThought crawlWordpress(String blogURL) {
    Document blogHTML = null;//from   ww  w .j a  v  a  2s.co  m

    Elements articles = null;
    Elements articleList_title = null;
    Elements articleList_content = null;
    Elements articleList_dateTime = null;
    Elements articleList_author = null;

    String[][] blogPosts = new String[100][4];

    // blogPosts[][0] = Blog Title
    // blogPosts[][1] = Posted On
    // blogPosts[][2] = Author
    // blogPosts[][3] = Blog Content

    Integer numberOfBlogs = 0;
    Integer iterator = 0;

    try {
        blogHTML = Jsoup.connect(blogURL).get();
    } catch (IOException e) {
        e.printStackTrace();
    }

    articles = blogHTML.getElementsByTag("article");

    iterator = 0;
    for (Element article : articles) {

        articleList_title = article.getElementsByClass("entry-title");
        for (Element blogs : articleList_title) {
            blogPosts[iterator][0] = blogs.text().toString();
        }

        articleList_dateTime = article.getElementsByClass("posted-on");
        for (Element blogs : articleList_dateTime) {
            blogPosts[iterator][1] = blogs.text().toString();
        }

        articleList_author = article.getElementsByClass("byline");
        for (Element blogs : articleList_author) {
            blogPosts[iterator][2] = blogs.text().toString();
        }

        articleList_content = article.getElementsByClass("entry-content");
        for (Element blogs : articleList_content) {
            blogPosts[iterator][3] = blogs.text().toString();
        }

        iterator++;

    }

    numberOfBlogs = iterator;

    JSONArray blog = new JSONArray();

    for (int k = 0; k < numberOfBlogs; k++) {
        JSONObject blogpost = new JSONObject();
        blogpost.put("blog_url", blogURL);
        blogpost.put("title", blogPosts[k][0]);
        blogpost.put("posted_on", blogPosts[k][1]);
        blogpost.put("author", blogPosts[k][2]);
        blogpost.put("content", blogPosts[k][3]);
        blog.put(blogpost);
    }

    SusiThought json = new SusiThought();
    json.setData(blog);
    return json;

}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }//from w  w  w  .  ja v a  2s.  c om

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public List<MediaSearchResult> search(MediaSearchOptions query) throws Exception {
    LOGGER.debug("search() " + query.toString());
    /*/*from  w w w  .j av  a2 s.c om*/
     * IMDb matches seem to come in several "flavours".
     * 
     * Firstly, if there is one exact match it returns the matching IMDb page.
     * 
     * If that fails to produce a unique hit then a list of possible matches are returned categorised as: Popular Titles (Displaying ? Results) Titles
     * (Exact Matches) (Displaying ? Results) Titles (Partial Matches) (Displaying ? Results)
     * 
     * We should check the Exact match section first, then the poplar titles and finally the partial matches.
     * 
     * Note: That even with exact matches there can be more than 1 hit, for example "Star Trek"
     */

    Pattern imdbIdPattern = Pattern.compile("/title/(tt[0-9]{7})/");

    List<MediaSearchResult> result = new ArrayList<MediaSearchResult>();

    String searchTerm = "";

    if (StringUtils.isNotEmpty(query.get(SearchParam.IMDBID))) {
        searchTerm = query.get(SearchParam.IMDBID);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        searchTerm = query.get(SearchParam.QUERY);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        searchTerm = query.get(SearchParam.TITLE);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        return result;
    }

    // parse out language and coutry from the scraper options
    String language = query.get(SearchParam.LANGUAGE);
    String myear = query.get(SearchParam.YEAR);
    String country = query.get(SearchParam.COUNTRY); // for passing the country to the scrape

    searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm);

    StringBuilder sb = new StringBuilder(imdbSite.getSite());
    sb.append("find?q=");
    try {
        // search site was everytime in UTF-8
        sb.append(URLEncoder.encode(searchTerm, "UTF-8"));
    } catch (UnsupportedEncodingException ex) {
        // Failed to encode the movie name for some reason!
        LOGGER.debug("Failed to encode search term: " + searchTerm);
        sb.append(searchTerm);
    }

    // we need to search for all - otherwise we do not find TV movies
    sb.append(CAT_TITLE);

    LOGGER.debug("========= BEGIN IMDB Scraper Search for: " + sb.toString());
    Document doc;
    try {
        CachedUrl url = new CachedUrl(sb.toString());
        url.addHeader("Accept-Language", getAcceptLanguage(language, country));
        doc = Jsoup.parse(url.getInputStream(), "UTF-8", "");
    } catch (Exception e) {
        LOGGER.debug("tried to fetch search response", e);

        // clear Cache
        CachedUrl.removeCachedFileForUrl(sb.toString());

        return result;
    }

    // check if it was directly redirected to the site
    Elements elements = doc.getElementsByAttributeValue("rel", "canonical");
    for (Element element : elements) {
        MediaMetadata md = null;
        // we have been redirected to the movie site
        String movieName = null;
        String movieId = null;

        String href = element.attr("href");
        Matcher matcher = imdbIdPattern.matcher(href);
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                movieId = matcher.group(1);
            }
        }

        // get full information
        if (!StringUtils.isEmpty(movieId)) {
            MediaScrapeOptions options = new MediaScrapeOptions();
            options.setImdbId(movieId);
            options.setLanguage(MediaLanguages.valueOf(language));
            options.setCountry(CountryCode.valueOf(country));
            options.setScrapeCollectionInfo(Boolean.parseBoolean(query.get(SearchParam.COLLECTION_INFO)));
            options.setScrapeImdbForeignLanguage(
                    Boolean.parseBoolean(query.get(SearchParam.IMDB_FOREIGN_LANGUAGE)));
            md = getMetadata(options);
            if (!StringUtils.isEmpty(md.getStringValue(MediaMetadata.TITLE))) {
                movieName = md.getStringValue(MediaMetadata.TITLE);
            }
        }

        // if a movie name/id was found - return it
        if (StringUtils.isNotEmpty(movieName) && StringUtils.isNotEmpty(movieId)) {
            MediaSearchResult sr = new MediaSearchResult(providerInfo.getId());
            sr.setTitle(movieName);
            sr.setIMDBId(movieId);
            sr.setYear(md.getStringValue(MediaMetadata.YEAR));
            sr.setMetadata(md);
            sr.setScore(1);

            // and parse out the poster
            String posterUrl = "";
            Element td = doc.getElementById("img_primary");
            if (td != null) {
                Elements imgs = td.getElementsByTag("img");
                for (Element img : imgs) {
                    posterUrl = img.attr("src");
                    posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                    posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
                    posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
                }
            }
            if (StringUtils.isNotBlank(posterUrl)) {
                sr.setPosterUrl(posterUrl);
            }

            result.add(sr);
            return result;
        }
    }

    // parse results
    // elements = doc.getElementsByClass("result_text");
    elements = doc.getElementsByClass("findResult");
    for (Element tr : elements) {
        // we only want the tr's
        if (!"tr".equalsIgnoreCase(tr.tagName())) {
            continue;
        }

        // find the id / name
        String movieName = "";
        String movieId = "";
        String year = "";
        Elements tds = tr.getElementsByClass("result_text");
        for (Element element : tds) {
            // we only want the td's
            if (!"td".equalsIgnoreCase(element.tagName())) {
                continue;
            }

            // filter out unwanted results
            Pattern unwanted = Pattern.compile(".*\\((TV Series|TV Episode|Short|Video Game)\\).*"); // stripped out .*\\(Video\\).*|
            Matcher matcher = unwanted.matcher(element.text());
            if (matcher.find()) {
                continue;
            }

            // is there a localized name? (aka)
            String localizedName = "";
            Elements italics = element.getElementsByTag("i");
            if (italics.size() > 0) {
                localizedName = italics.text().replace("\"", "");
            }

            // get the name inside the link
            Elements anchors = element.getElementsByTag("a");
            for (Element a : anchors) {
                if (StringUtils.isNotEmpty(a.text())) {
                    // movie name
                    if (StringUtils.isNotBlank(localizedName) && !language.equals("en")) {
                        // take AKA as title, but only if not EN
                        movieName = localizedName;
                    } else {
                        movieName = a.text();
                    }

                    // parse id
                    String href = a.attr("href");
                    matcher = imdbIdPattern.matcher(href);
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            movieId = matcher.group(1);
                        }
                    }

                    // try to parse out the year
                    Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
                    matcher = yearPattern.matcher(element.text());
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            year = matcher.group(1);
                            break;
                        }
                    }
                    break;
                }
            }
        }

        // if an id/name was found - parse the poster image
        String posterUrl = "";
        tds = tr.getElementsByClass("primary_photo");
        for (Element element : tds) {
            Elements imgs = element.getElementsByTag("img");
            for (Element img : imgs) {
                posterUrl = img.attr("src");
                posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
                posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
            }
        }

        // if no movie name/id was found - continue
        if (StringUtils.isEmpty(movieName) || StringUtils.isEmpty(movieId)) {
            continue;
        }

        MediaSearchResult sr = new MediaSearchResult(providerInfo.getId());
        sr.setTitle(movieName);
        sr.setIMDBId(movieId);
        sr.setYear(year);
        sr.setPosterUrl(posterUrl);

        // populate extra args
        MetadataUtil.copySearchQueryToSearchResult(query, sr);

        if (movieId.equals(query.get(SearchParam.IMDBID))) {
            // perfect match
            sr.setScore(1);
        } else {
            // compare score based on names
            float score = MetadataUtil.calculateScore(searchTerm, movieName);
            if (posterUrl.isEmpty() || posterUrl.contains("nopicture")) {
                LOGGER.debug("no poster - downgrading score by 0.01");
                score = score - 0.01f;
            }
            if (myear != null && !myear.isEmpty() && !myear.equals("0") && !myear.equals(year)) {
                LOGGER.debug("parsed year does not match search result year - downgrading score by 0.01");
                score = score - 0.01f;
            }
            sr.setScore(score);
        }

        result.add(sr);

        // only get 40 results
        if (result.size() >= 40) {
            break;
        }
    }
    Collections.sort(result);
    Collections.reverse(result);

    return result;
}