List of usage examples for org.jsoup.nodes Document getElementsByClass
public Elements getElementsByClass(String className)
From source file:com.zacwolf.commons.email.Email.java
private void prepare(final org.jsoup.nodes.Document doc) { removeComments(doc);//Remove any comments from the html of the message to reduce the size //Change the title to match the subject of the email if (doc.getElementsByTag("title").size() > 0) doc.getElementsByTag("title").first().html(getSubject()); //Replace the contents of any tags with class="date" with the current date if (doc.getElementsByClass("date").size() > 0) { for (org.jsoup.nodes.Element datelem : doc.getElementsByClass("date")) { SimpleDateFormat df = new SimpleDateFormat("MMMMMMMMMM d, yyyy"); if (datelem.hasAttr("format")) { try { df = new SimpleDateFormat(datelem.attr("format")); } catch (Exception ee) { } //throw it away and just go back to the default format; datelem.html(df.format(TimeUtils.getGMTtime())); }/*w w w . j av a2 s .co m*/ } } //tables need the border-spacing: style attribute; added for GMail compatiblity for (org.jsoup.nodes.Element tbl : doc.getElementsByTag("table")) if (!tbl.attr("style").contains("border-spacing:")) tbl.attr("style", tbl.attr("style") + (!tbl.attr("style").endsWith(";") ? ";" : "") + "border-spacing:0;"); }
From source file:com.shahnami.fetch.Controller.FetchMovies.java
public List<Movie> getBollyMovies(String query) { try {/*from w w w . j ava2s.c o m*/ Document doc; Elements searchDetails; String link; String title; String image = null; Elements linkAndTitles; Document movieDetails; double rating = 0; String magnetLink; String torrentFile; NumberFormat formatter; String output; if (query.equalsIgnoreCase("")) { // } else { doc = Jsoup.connect("https://1337x.to/search/" + URLEncoder.encode(query, "UTF-8") + "+hindi/1/") .userAgent("Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.7.2) Gecko/20040803").get(); searchDetails = doc.getElementsByClass("coll-1"); for (Element e : searchDetails) { linkAndTitles = e.getElementsByTag("strong"); for (Element e1 : linkAndTitles) { link = "https://1337x.to" + e1.getElementsByTag("a").first().attr("href"); title = e1.getElementsByTag("a").first().html(); if (!link.contains("/mirror")) { Movie m = new Movie(); m.setTitle(title.replace("<b>", "").replace("</b>", "").trim()); //.substring(0, 47)+ "..." m.setLanguage("Hindi"); m.setUrl(link); Pattern pattern = Pattern.compile(".*([\\s(]+[0-9]{4}[\\s)]+).*"); Matcher matcher = pattern.matcher(title); while (matcher.find()) { m.setYear(Integer .parseInt(matcher.group(1).replace("(", "").replace(")", "").trim())); } movieDetails = Jsoup.connect(link).get(); try { image = movieDetails.getElementsByClass("moive-box").first().getElementsByTag("img") .first().attr("src"); rating = Float.parseFloat( movieDetails.getElementsByClass("rateing").first().getElementsByTag("i") .attr("style").split(":")[1].replace("%;", "").trim()); } catch (Exception ex) { // } magnetLink = movieDetails.getElementsByClass("magnet").first().attr("href"); torrentFile = movieDetails.getElementsByClass("torrent").first().attr("href"); formatter = NumberFormat.getNumberInstance(); formatter.setMinimumFractionDigits(2); formatter.setMaximumFractionDigits(2); output = formatter.format(rating / 10); rating = Double.parseDouble(output); if (rating < 1) { rating = 0; } m.setRating(rating); m.setSmall_cover_image(image); List<Torrent> torrents = new ArrayList<>(); Torrent t = new Torrent(); t.setUrl(magnetLink); Torrent t2 = new Torrent(); t2.setUrl(torrentFile); torrents.add(t); torrents.add(t2); m.setTorrents(torrents); m.getTorrents().get(0).setSeeds( Integer.valueOf(movieDetails.getElementsByClass("green").first().text())); m.getTorrents().get(0).setPeers( Integer.valueOf(movieDetails.getElementsByClass("red").first().text())); m.setIsBollywood(true); m.setSize(movieDetails.getElementsByClass("list").first().getElementsByTag("li").get(3) .text().substring(10).trim()); _movies.add(m); } } //String link = linkAndTitle.getElementsByAttribute("href").first().text(); //System.out.println(link); //String title = linkAndTitle.getElementsByTag("b").text(); //System.out.println(title); } } } catch (UnsupportedEncodingException ex) { Logger.getLogger(FetchMovies.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(FetchMovies.class.getName()).log(Level.SEVERE, null, ex); } return _movies; }
From source file:eu.masconsult.bgbanking.banks.dskbank.DskClient.java
@Override public String authenticate(String username, String password) throws IOException, ParseException, CaptchaException { final HttpResponse resp; final ArrayList<NameValuePair> params = new ArrayList<NameValuePair>(); params.add(new BasicNameValuePair(PARAM_USERNAME, username)); params.add(new BasicNameValuePair(PARAM_PASSWORD, password)); final HttpEntity entity; try {//from ww w . j a v a 2s .com entity = new UrlEncodedFormEntity(params); } catch (final UnsupportedEncodingException e) { // this should never happen. throw new IllegalStateException(e); } String uri = BASE_URL + "?" + URLEncodedUtils.format(Arrays.asList(new BasicNameValuePair(XML_ID, AUTH_XML_ID)), ENCODING); Log.i(TAG, "Authenticating to: " + uri); final HttpPost post = new HttpPost(uri); post.addHeader(entity.getContentType()); post.setHeader("Accept", "*/*"); post.setEntity(entity); try { resp = getHttpClient().execute(post); if (resp.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { throw new ParseException("login: unhandled http status " + resp.getStatusLine().getStatusCode() + " " + resp.getStatusLine().getReasonPhrase()); } String response = EntityUtils.toString(resp.getEntity()); Log.v(TAG, "response = " + response); Document doc = Jsoup.parse(response, BASE_URL); Element mainForm = doc.getElementById("mainForm"); if (mainForm == null) { throw new ParseException("login: missing mainForm"); } String action = BASE_URL + mainForm.attr("action"); Log.v(TAG, "action=" + action); UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(action); String user_id = sanitizer.getValue(PARAM_USER_ID); String session_id = sanitizer.getValue(PARAM_SESSION_ID); if (user_id == null || "".equals(user_id) || session_id == null || "".equals(session_id)) { if (doc.getElementsByClass("redtext").size() > 0) { // bad authentication return null; } else { // TODO handle captcha Elements captcha = doc.select("input[name=captcha_hkey]"); if (captcha != null && captcha.size() == 1) { String captchaHash = captcha.first().attr("value"); String captchaUri = BASE_URL + "?" + URLEncodedUtils .format(Arrays.asList(new BasicNameValuePair(XML_ID, CAPTCHA_XML_ID), new BasicNameValuePair("captcha_key", captchaHash)), ENCODING); throw new CaptchaException(captchaUri); } throw new ParseException("no user_id or session_id: " + action); } } return URLEncodedUtils.format(Arrays.asList(new BasicNameValuePair(PARAM_USER_ID, user_id), new BasicNameValuePair(PARAM_SESSION_ID, session_id)), ENCODING); } catch (ClientProtocolException e) { throw new IOException(e.getMessage()); } }
From source file:jp.mau.twappremover.MainActivity.java
private void getApps() { _apps.clear();/* www . ja v a 2 s .c om*/ HttpGet request = new HttpGet(APP_PAGE); request.addHeader("User-Agent", USER_AGENT); request.addHeader("Cookie", "_twitter_sess=" + _session_id + "; auth_token=" + _cookie_auth); try { String result = _client.execute(request, new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { switch (response.getStatusLine().getStatusCode()) { case HttpStatus.SC_OK: return EntityUtils.toString(response.getEntity(), "UTF-8"); case HttpStatus.SC_NOT_FOUND: throw new RuntimeException("not found"); default: throw new RuntimeException("error"); } } }); Document doc = null; doc = Jsoup.parse(result); // parse top page and get authenticity token Elements forms = doc.getElementsByTag("form"); for (Element e : forms) { Elements auths = e.getElementsByAttributeValue("name", "authenticity_token"); if (auths.size() > 0) { _auth_token = auths.get(0).attr("value"); break; } } Elements apps = doc.getElementsByClass("app"); for (Element e : apps) { LinkedApp app = new LinkedApp(); if (e.getElementsByTag("strong").size() > 0) app.name = e.getElementsByTag("strong").get(0).text(); if (e.getElementsByClass("creator").size() > 0) app.creator = e.getElementsByClass("creator").get(0).text(); if (e.getElementsByClass("description").size() > 0) app.desc = e.getElementsByClass("description").get(0).text(); if (e.getElementsByClass("app-img").size() > 0) app.imgUrl = e.getElementsByClass("app-img").get(0).attr("src"); if (e.getElementsByClass("revoke").size() > 0) { String tmp = e.getElementsByClass("revoke").get(0).attr("id"); app.revokeId = tmp.replaceAll(KEY_HEADER_REVOKE, ""); } else { // revoke id ????(facebook????????) continue; } _apps.add(app); } _handler.post(new Runnable() { @Override public void run() { _appadapter.notifyDataSetChanged(); } }); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public ProlongResult prolong(String a, Account account, int useraction, String selection) throws IOException { if (!initialised) { start();/*from w w w.ja va 2 s . c om*/ } if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT)); } catch (OpacErrorException e) { return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage()); } } else if (logged_in_as.getId() != account.getId()) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT)); } catch (OpacErrorException e) { return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage()); } } if (useraction == MultiStepResult.ACTION_CONFIRMATION) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("target", "make_vl")); nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ProlongResult(MultiStepResult.Status.OK); } else { String html = httpGet(opac_url + "/" + a, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.getElementsByClass("kontomeldung").size() == 1) { return new ProlongResult(MultiStepResult.Status.ERROR, doc.getElementsByClass("kontomeldung").get(0).text()); } if (doc.select("#verlaengern").size() == 1) { if (doc.select(".kontozeile_center table").size() == 1) { Element table = doc.select(".kontozeile_center table").first(); ProlongResult res = new ProlongResult(MultiStepResult.Status.CONFIRMATION_NEEDED); List<String[]> details = new ArrayList<>(); for (Element row : table.select("tr")) { if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) { details.add(new String[] { row.select(".konto_feld").text().trim(), row.select(".konto_feldinhalt").text().trim() }); } } res.setDetails(details); return res; } else { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("target", "make_vl")); nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ProlongResult(MultiStepResult.Status.OK); } } } return new ProlongResult(MultiStepResult.Status.ERROR, "??"); }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException { if (!initialised) { start();//www .j ava 2s.com } if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.CONNECTION_ERROR)); } catch (OpacErrorException e) { return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage()); } } else if (logged_in_as.getId() != account.getId()) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.CONNECTION_ERROR)); } catch (OpacErrorException e) { return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage()); } } String html = httpGet(opac_url + "/index.asp?target=alleverl", getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.getElementsByClass("kontomeldung").size() == 1) { String err = doc.getElementsByClass("kontomeldung").get(0).text(); return new ProlongAllResult(MultiStepResult.Status.ERROR, err); } if (doc.select(".kontozeile table").size() == 1) { Map<Integer, String> colmap = new HashMap<>(); List<Map<String, String>> result = new ArrayList<>(); for (Element tr : doc.select(".kontozeile table tr")) { if (tr.select(".tabHeaderKonto").size() > 0) { int i = 0; for (Element th : tr.select("th")) { if (th.text().contains("Verfasser")) { colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_AUTHOR); } else if (th.text().contains("Titel")) { colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_TITLE); } else if (th.text().contains("Neue")) { colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_NEW_RETURNDATE); } else if (th.text().contains("Frist")) { colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_OLD_RETURNDATE); } else if (th.text().contains("Status")) { colmap.put(i, OpacApi.ProlongAllResult.KEY_LINE_MESSAGE); } i++; } } else { Map<String, String> line = new HashMap<>(); for (Entry<Integer, String> entry : colmap.entrySet()) { line.put(entry.getValue(), tr.child(entry.getKey()).text().trim()); } result.add(line); } } if (doc.select("input#make_allvl").size() > 0) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("target", "make_allvl_flag")); nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); } return new ProlongAllResult(MultiStepResult.Status.OK, result); } return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.INTERNAL_ERROR)); }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected LoginResponse login(Account acc) throws OpacErrorException, IOException { String html;/* www. j a v a 2 s.c o m*/ List<NameValuePair> nameValuePairs = new ArrayList<>(); try { httpGet(opac_url + "/login.do", ENCODING); } catch (IOException e1) { e1.printStackTrace(); } nameValuePairs.add(new BasicNameValuePair("username", acc.getName())); nameValuePairs.add(new BasicNameValuePair("password", acc.getPassword())); nameValuePairs.add(new BasicNameValuePair("CSId", CSId)); nameValuePairs.add(new BasicNameValuePair("methodToCall", "submit")); nameValuePairs.add(new BasicNameValuePair("login_action", "Login")); html = httpPost(opac_url + "/login.do", new UrlEncodedFormEntity(nameValuePairs), ENCODING); Document doc = Jsoup.parse(html); if (doc.getElementsByClass("alert").size() > 0) { if (doc.select(".alert").text().contains("Nutzungseinschr") && doc.select("a[href*=methodToCall=done]").size() > 0) { // This is a warning that we need to acknowledge, it will be shown in the account // view httpGet(opac_url + "/login.do?methodToCall=done", ENCODING); logged_in = System.currentTimeMillis(); logged_in_as = acc; return new LoginResponse(true, doc.getElementsByClass("alert").get(0).text()); } else { throw new OpacErrorException(doc.getElementsByClass("alert").get(0).text()); } } logged_in = System.currentTimeMillis(); logged_in_as = acc; return new LoginResponse(true); }
From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java
public void parseWxOpenId() { Document doc; try {// ww w. ja v a2s. co m // need http protocol // doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE+ wxFoo.getSubscribeId()).get(); doc = Jsoup.connect("http://weixin.sogou.com/weixin?type=1&query=" + wxFoo.getSubscribeId() + "&fr=sgsearch&ie=utf8&_ast=1423915648&_asf=null&w=01019900&cid=null&sut=19381").get(); LOG.debug("openID html INFO:" + doc.html()); // get page title String title = doc.title(); LOG.debug("title : " + title); // get all "?:" value of html <span> //Elements openIdLink = doc.select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELEMENTS).select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELE_IDENTITY); Elements openIdLink = doc.getElementsByClass("wx-rb"); Element a = null; String openIdLinkHref = ""; if (openIdLink != null && openIdLink.size() > 0) { Iterator<Element> itea = openIdLink.iterator(); while (itea.hasNext()) { a = itea.next(); LOG.debug("openID html INFO:" + a.html()); if (a.getElementsByTag("em").html().indexOf(wxFoo.getSubscribeId()) != -1) { break; } } } if (a != null) { openIdLinkHref = a.attr("href"); } LOG.debug("openIdLinkHref:" + openIdLinkHref); // FIXME:???? if (this.wxFoo.getOpenId() == null && openIdLinkHref.length() > 0) { this.wxFoo.setOpenId(openIdLinkHref.split(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_KEYWORDS)[1]); LOG.info("saved wxOpenId value: " + this.wxFoo.getOpenId()); GlobalVariables.wxFooListWithOpenId.add(this.wxFoo); // File reporting new FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID, GlobalVariables.wxFooListWithOpenId, FileReporter.REPORTER_TYPE.R_T_OPENID, FileReporter.REPORTER_FILE_TYPE.EXCEL).write(); // Then,OpenID JSON site parse if (this.wxFoo.getOpenId() != null) { // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), this.wxFoo.getOpenId() }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } else { LOG.warn("SogouSearchQuery getOpenId Failure! site info:" + wxFoo.getCode()); // TODO write those info to File or DB for collect which // agency not open weixin service // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), "" }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); LOG.warn("Can not get subsriber info: " + this.wxFoo.getCode()); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } }
From source file:faescapeplan.FAEscapePlanUI.java
private void downloadProfile() { try {//ww w . j a v a2 s . c o m Document userPage = Jsoup.connect("http://www.furaffinity.net/user/" + userData.getName() + "/") .cookies(userData.getCookies()).userAgent(USER_AGENT).get(); String body = userPage.getElementsByClass("ldot").get(0).html(); Path profilePath = Paths .get(this.saveLocText.getText() + "\\" + userData.getName() + "\\userprofile.txt"); String parsedBody = removeHtmlTags(body); try (FileWriter profileWriter = new FileWriter(new File(profilePath.toString()))) { profileWriter.write(parsedBody); } } catch (IOException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:faescapeplan.FAEscapePlanUI.java
private void getProfileImg() { try {//from w ww.j a v a 2s . c om Document doc = Jsoup.connect("http://www.furaffinity.net/user/" + userData.getName()) .cookies(userData.getCookies()).userAgent(USER_AGENT).get(); String iconLink = "http:" + doc.getElementsByClass("avatar").get(0).attr("src"); Response iconResponse = Jsoup.connect(iconLink).cookies(userData.getCookies()).userAgent(USER_AGENT) .maxBodySize(0).ignoreContentType(true).execute(); String iconPath = tempPath + userData.getName() + ".gif"; try (FileOutputStream userIcon = new FileOutputStream(new File(iconPath))) { userIcon.write(iconResponse.bodyAsBytes()); } ImageIcon icon = new ImageIcon(iconPath); this.iconDisplay.setIcon(icon); } catch (IOException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); } }