List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:me.vertretungsplan.parser.LoginHandler.java
private String handleLogin(Executor executor, CookieStore cookieStore, boolean needsResponse) throws JSONException, IOException, CredentialInvalidException { if (auth == null) return null; if (!(auth instanceof UserPasswordCredential || auth instanceof PasswordCredential)) { throw new IllegalArgumentException("Wrong authentication type"); }//from w w w . j av a 2 s . c om String login; String password; if (auth instanceof UserPasswordCredential) { login = ((UserPasswordCredential) auth).getUsername(); password = ((UserPasswordCredential) auth).getPassword(); } else { login = null; password = ((PasswordCredential) auth).getPassword(); } JSONObject data = scheduleData.getData(); JSONObject loginConfig = data.getJSONObject(LOGIN_CONFIG); String type = loginConfig.optString(PARAM_TYPE, "post"); switch (type) { case "post": List<Cookie> cookieList = cookieProvider != null ? cookieProvider.getCookies(auth) : null; if (cookieList != null && !needsResponse) { for (Cookie cookie : cookieList) cookieStore.addCookie(cookie); String checkUrl = loginConfig.optString(PARAM_CHECK_URL, null); String checkText = loginConfig.optString(PARAM_CHECK_TEXT, null); if (checkUrl != null && checkText != null) { String response = executor.execute(Request.Get(checkUrl)).returnContent().asString(); if (!response.contains(checkText)) { return null; } } else { return null; } } executor.clearCookies(); Document preDoc = null; if (loginConfig.has(PARAM_PRE_URL)) { String preUrl = loginConfig.getString(PARAM_PRE_URL); String preHtml = executor.execute(Request.Get(preUrl)).returnContent().asString(); preDoc = Jsoup.parse(preHtml); } String postUrl = loginConfig.getString(PARAM_URL); JSONObject loginData = loginConfig.getJSONObject(PARAM_DATA); List<NameValuePair> nvps = new ArrayList<>(); String typo3Challenge = null; if (loginData.has("_hiddeninputs") && preDoc != null) { for (Element hidden : preDoc.select(loginData.getString("_hiddeninputs") + " input[type=hidden]")) { nvps.add(new BasicNameValuePair(hidden.attr("name"), hidden.attr("value"))); if (hidden.attr("name").equals("challenge")) { typo3Challenge = hidden.attr("value"); } } } for (String name : JSONObject.getNames(loginData)) { String value = loginData.getString(name); if (name.equals("_hiddeninputs")) continue; switch (value) { case "_login": value = login; break; case "_password": value = password; break; case "_password_md5": value = DigestUtils.md5Hex(password); break; case "_password_md5_typo3": value = DigestUtils.md5Hex(login + ":" + DigestUtils.md5Hex(password) + ":" + typo3Challenge); break; } nvps.add(new BasicNameValuePair(name, value)); } Request request = Request.Post(postUrl); if (loginConfig.optBoolean("form-data", false)) { MultipartEntityBuilder builder = MultipartEntityBuilder.create(); for (NameValuePair nvp : nvps) { builder.addTextBody(nvp.getName(), nvp.getValue()); } request.body(builder.build()); } else { request.bodyForm(nvps, Charset.forName("UTF-8")); } String html = executor.execute(request).returnContent().asString(); if (cookieProvider != null) cookieProvider.saveCookies(auth, cookieStore.getCookies()); String checkUrl = loginConfig.optString(PARAM_CHECK_URL, null); String checkText = loginConfig.optString(PARAM_CHECK_TEXT, null); if (checkUrl != null && checkText != null) { String response = executor.execute(Request.Get(checkUrl)).returnContent().asString(); if (response.contains(checkText)) throw new CredentialInvalidException(); } else if (checkText != null) { if (html.contains(checkText)) throw new CredentialInvalidException(); } return html; case "basic": if (login == null) throw new IOException("wrong auth type"); executor.auth(login, password); if (loginConfig.has(PARAM_URL)) { String url = loginConfig.getString(PARAM_URL); if (executor.execute(Request.Get(url)).returnResponse().getStatusLine().getStatusCode() != 200) { throw new CredentialInvalidException(); } } break; case "ntlm": if (login == null) throw new IOException("wrong auth type"); executor.auth(login, password, null, null); if (loginConfig.has(PARAM_URL)) { String url = loginConfig.getString(PARAM_URL); if (executor.execute(Request.Get(url)).returnResponse().getStatusLine().getStatusCode() != 200) { throw new CredentialInvalidException(); } } break; case "fixed": String loginFixed = loginConfig.optString(PARAM_LOGIN, null); String passwordFixed = loginConfig.getString(PARAM_PASSWORD); if (!Objects.equals(loginFixed, login) || !Objects.equals(passwordFixed, password)) { throw new CredentialInvalidException(); } break; } return null; }
From source file:com.astamuse.asta4d.web.form.flow.base.BasicFormFlowSnippetTrait.java
/** * Sub classes could override this method to customize how to rewrite the array index for cascade array forms. * //from w w w . j av a2 s .co m * @param renderTargetStep * @param form * @param indexes * @return */ default Renderer rewriteCascadeFormFieldArrayRef(final String renderTargetStep, final Object form, final int[] indexes) { final String[] targetAttrs = rewriteCascadeFormFieldArrayRefTargetAttrs(); String[] attrSelectors = new String[targetAttrs.length]; for (int i = 0; i < attrSelectors.length; i++) { attrSelectors[i] = SelectorUtil.attr(targetAttrs[i]); } return Renderer.create(StringUtils.join(attrSelectors, ","), new ElementSetter() { @Override public void set(Element elem) { String v; for (String attr : targetAttrs) { v = elem.attr(attr); if (StringUtils.isNotEmpty(v)) { elem.attr(attr, rewriteArrayIndexPlaceHolder(v, indexes)); } } } }); }
From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java
private HttpPost getForm(final HttpContext httpContext, final HttpResponse response, final String user, final String password) throws IllegalStateException, IOException { final HttpEntity entity = response.getEntity(); final HttpHost host = (HttpHost) httpContext.getAttribute(ExecutionContext.HTTP_TARGET_HOST); final String requestUri = getLatestRedirectedUrl(httpContext); final String requestBaseUrl = requestUri.substring(0, requestUri.lastIndexOf('/')); final String userFormId = getHtmlElementId(host, FormId.USER); final String passFormId = getHtmlElementId(host, FormId.PASS); final String loginFormId = getHtmlElementId(host, FormId.LOGIN_FORM); final String loginButton = getSsoErrorHandler(host).getSsoLoginButtonName(); log.debug("Looking for HTML input form retrieved from " + requestUri); final List<NameValuePair> formNvps = new ArrayList<NameValuePair>(); final Document doc = Jsoup.parse(entity.getContent(), "UTF-8", requestBaseUrl); final org.jsoup.nodes.Element form = doc .select("form" + (loginFormId == null ? "" : "[id=" + loginFormId + "]")).first(); final String formAction = form.attr("action"); final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction)); final Elements formFields = form.select("input"); for (final Element element : formFields) { final String fieldName = element.attr("name"); String fieldValue = element.attr("value"); final String fieldId = element.attr("id"); log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue, fieldId));//from ww w . j av a 2s . com if (fieldId.equalsIgnoreCase(userFormId)) { fieldValue = user; log.debug(String.format("Set formField user='%s'", user)); } else if (fieldId.equalsIgnoreCase(passFormId)) { log.debug("Set formField password='*******'"); fieldValue = password; } if (loginButton != null && element.attr("type").equalsIgnoreCase("submit")) { if (element.attr("name").equalsIgnoreCase(loginButton)) { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } else { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8")); return formPost; }
From source file:com.mythesis.userbehaviouranalysis.WebParser.java
/** * Parse the url and get all the content * @param link the url to parse/*ww w . ja v a 2 s . co m*/ * @return The content parsed */ private String cleanhtml(String link) { try { Document doc = Jsoup.connect(link).timeout(10 * 1000).get(); String title = doc.title(); String mainbody = doc.body().text(); Elements links = doc.select("a[href]"); Elements media = doc.select("[src]"); //fix link html to remove https:// or http:// and simple / if (link.substring(link.length() - 1, link.length()).equalsIgnoreCase("/")) { link = link.substring(0, link.length() - 1); } if (link.substring(0, 5).equalsIgnoreCase("https")) { link = link.substring(8); } else if (link.substring(0, 4).equalsIgnoreCase("http")) { link = link.substring(7); } String anchortext = ""; String alttext = ""; //-----get the anchor text of internal links for (Element el : links) { String str_check = el.attr("abs:href"); if (el.attr("abs:href").contains(link) && el.text().length() > 1) { anchortext = anchortext + el.text() + " "; } } //-------get alt text to internal images links for (Element medi : media) { if (medi.getElementsByTag("img").attr("src").contains(link)) { alttext = alttext + " " + medi.getElementsByTag("img").attr("alt"); } if (medi.getElementsByTag("img").attr("src").startsWith("/")) { alttext = alttext + " " + medi.getElementsByTag("img").attr("alt"); } } String content = mainbody + title + anchortext + alttext; return content; } catch (IOException ex) { Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex); String check = null; return check; } catch (NullPointerException ex) { Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex); String check = null; return check; } catch (Exception ex) { Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex); String check = null; return check; } }
From source file:org.confab.PhpBB3Parser.java
/** * Parses each post for a particular topic. * @param html Html containing the posts to be parsed * @return List of Post objects *//*from www . j a v a 2 s . c o m*/ public List<Post> parsePosts(Document html, ForumThread parent) { Utilities.debug("Starting parsePosts"); List<Post> ret = new ArrayList<Post>(); // Each post should have it's own table Elements div_posts = html.select("div#posts"); assert !div_posts.isEmpty(); Elements posts_table = div_posts.select("table[id~=(post\\d+)]"); assert !posts_table.isEmpty(); for (Element el_post : posts_table) { Post new_post = new Post(parent); // Get post id (id=post\d+) new_post.id = el_post.attr("id").replace("post", "").trim(); assert new_post.id != null; // Get post message Elements el_message = el_post.select("div[id~=(post_message_\\d+)]"); assert !el_message.isEmpty(); new_post.message = el_message.first().text(); assert new_post.message != null; Utilities.debug("new_post.message: " + new_post.message); // Get post author Elements el_author = el_post.select(".bigusername"); assert !el_author.isEmpty(); new_post.author.username = el_author.first().text(); assert new_post.author != null; Utilities.debug("new_post.author: " + new_post.author); ret.add(new_post); } Utilities.debug("Finished parsePosts"); return ret; }
From source file:me.vertretungsplan.parser.UntisInfoParser.java
private int getRowspan(Element cell) { return cell.hasAttr("rowspan") ? Integer.valueOf(cell.attr("rowspan")) : 1; }
From source file:mobi.jenkinsci.ci.client.JenkinsClient.java
private URL getUrl(final Element issueLink, final String attr) throws MalformedURLException { if (issueLink == null) { return null; }/* ww w .j a v a 2 s. c o m*/ final String linkUrl = issueLink.attr(attr); if (linkUrl == null) { return null; } if (linkUrl.startsWith("http")) { return new URL(linkUrl); } else { final URL baseUrl = new URL(issueLink.baseUri()); return new URL(baseUrl, linkUrl); } }
From source file:com.astamuse.asta4d.web.form.flow.base.AbstractFormFlowSnippet.java
/** * Sub classes could override this method to customize how to rewrite the array index for cascade array forms. * //from w w w . j a va 2s. co m * @param renderTargetStep * @param form * @param cascadeFormArrayIndex * @return */ protected Renderer rewriteCascadeFormFieldArrayRef(final String renderTargetStep, final Object form, final int cascadeFormArrayIndex) { final String[] targetAttrs = rewriteCascadeFormFieldArrayRefTargetAttrs(); String[] attrSelectors = new String[targetAttrs.length]; for (int i = 0; i < attrSelectors.length; i++) { attrSelectors[i] = SelectorUtil.attr(targetAttrs[i]); } return Renderer.create(StringUtils.join(attrSelectors, ","), new ElementSetter() { @Override public void set(Element elem) { String v; for (String attr : targetAttrs) { v = elem.attr(attr); if (StringUtils.isNotEmpty(v)) { elem.attr(attr, rewriteArrayIndexPlaceHolder(v, cascadeFormArrayIndex)); } } } }); }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected void addSortingSearchFields(List<SearchField> fields) throws IOException, JSONException { final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding()); final Document doc = Jsoup.parse(html); for (int i = 0; i < 3; i++) { final Element tr = doc.select("#sort_editor tr.sort_" + i).first(); final DropdownSearchField field = new DropdownSearchField(); field.setMeaning(SearchField.Meaning.ORDER); field.setId("sort_" + i); field.setDisplayName(tr.select("td").first().text()); field.addDropdownValue("", ""); for (final Element option : tr.select(".crit option")) { if (option.hasAttr("selected")) { field.addDropdownValue(0, option.attr("value"), option.text()); } else { field.addDropdownValue(option.attr("value"), option.text()); }//ww w . j a v a 2 s. c o m } fields.add(field); } }
From source file:org.confab.VBulletinParser.java
public List<Forum> parseForums(Document root, BulletinBoard parent) { Utilities.debug("parseForums"); List<Forum> ret = new ArrayList<Forum>(); // get table//from w w w . j a va2 s . c om Elements forum_table = root.select("tbody[id*=collapseobj_forumbit_] tr"); assert !forum_table.isEmpty(); for (Element el_tr : forum_table) { Forum new_forum = new Forum(parent); // Get the table data for this row Elements el_tds = el_tr.select("td"); assert !el_tds.isEmpty() : el_tr.html(); // xbox360achievements has a lot of subforums and puts these in their own table // The <a>'s are picked up as children of the parent <td> so don't parse this sub- // tables row's seperatly if (!el_tds.select("td.thead").isEmpty() || el_tds.size() < 3) { //Utilities.debug("tr doesn't seem to have anything we want, skipping."); continue; } // Get the title URL Elements els_a = el_tds.get(1).select("a"); assert !els_a.isEmpty() : el_tds.html(); new_forum.url = els_a.first().attr("href"); assert new_forum.url != null; Utilities.debug("new_forum.url : " + new_forum.url); // Get the title text assert els_a.first() != null; new_forum.title = els_a.first().text(); assert new_forum.title != null; Utilities.debug("new_forum.title : " + new_forum.title); // Check for any subforums in remaining a elements els_a.remove(els_a.first()); for (Element el_a : els_a) { Forum sub_forum = new Forum(parent); sub_forum.url = el_a.attr("href"); assert sub_forum.url != null; sub_forum.title = el_a.text(); assert sub_forum.title != null; new_forum.subForums.add(sub_forum); Utilities.debug("added subForum: " + sub_forum.title); } // Get num viewing the current forum Element el_viewing = el_tr.select(":matchesOwn((\\d+ Viewing))").first(); if (el_viewing != null) { new_forum.numViewing = el_viewing.text(); } else { new_forum.numViewing = "0"; } Utilities.debug("new_forum.numViewing : " + new_forum.numViewing); // Get the description/message of this topic Element el_description = el_tds.get(1).select("div.smallfont").first(); if (el_description != null) { new_forum.description = el_description.text(); } else { new_forum.description = ""; } Utilities.debug("new_forum.description : " + new_forum.description); Utilities.debug("new_forum.parent.url : " + new_forum.parent.url); ret.add(new_forum); Utilities.debug("-----"); } Utilities.debug("end parseForums"); return ret; }