List of usage examples for org.jsoup.select Elements attr
public String attr(String attributeKey)
From source file:controllers.NWProxy.java
private static org.jsoup.nodes.Document RedirectionHandler(String url) throws IOException { org.jsoup.nodes.Document articleDoc = Jsoup.connect(url).get(); String officialUrl = "http://www.newsweek.com"; String redirectedUrl = null;/* ww w . j a v a 2 s. com*/ Elements meta = articleDoc.select("html head meta"); if (meta.attr("http-equiv").contains("refresh")) { redirectedUrl = officialUrl + meta.attr("content").substring(meta.attr("content").indexOf("=") + 1).replaceAll("'", ""); return RedirectionHandler(redirectedUrl); } return articleDoc; }
From source file:app.data.parse.WebPageUtil.java
public static WebPageInfo parse(String url, Cache<String, WebPageInfo> urlInfoCache) throws IOException { String original = url;//from ww w. j av a2s .co m // hit toutiao.io // fixme http://toutiao.io/shares/640539/url if (original.startsWith("https://toutiao.io/posts/")) { original = original.replace("/posts/", "/k/"); } // check cache WebPageInfo info = urlInfoCache != null ? urlInfoCache.getIfPresent(original) : null; if (info != null) { return info; } else { info = new WebPageInfo(); info.url = original; } // attach url Document doc = requestUrl(info.url); info.url = doc.baseUri(); // or doc.location() // hit gold.xitu.io if (info.url.startsWith("http://gold.xitu.io/entry/")) { Elements origin = doc.select("div[class=ellipsis]"); Elements originLink = origin.select("a[class=share-link]"); info.url = originLink.attr("href"); // reconnect doc = requestUrl(info.url); info.url = doc.baseUri(); // or doc.location() } info.url = smartUri(info.url); // get title Elements metaTitle = doc.select("meta[property=og:title]"); if (metaTitle != null) { info.title = metaTitle.attr("content"); } if (StringUtils.isEmpty(info.title)) { metaTitle = doc.select("meta[property=twitter:title]"); if (metaTitle != null) { info.title = metaTitle.attr("content"); } info.title = StringUtils.isEmpty(info.title) ? doc.title() : info.title; } // get desc Elements metaDesc = doc.select("meta[property=og:description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.select("meta[property=twitter:description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.select("meta[name=description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.body().select("p"); if (metaDesc != null) { for (Element element : metaDesc) { info.description = element.text(); if (info.description != null && info.description.length() >= 20) { break; } } } } } } info.description = ellipsis(info.description, 140, "..."); // cache info if (urlInfoCache != null) { urlInfoCache.put(original, info); } return info; }
From source file:com.cbmapi.CbmAPI.java
public static String searchCpuByName(String cpuName) { String encodedName = encodeToUrl(cpuName); Document html = null;// w w w .j a v a2s .c o m String url = null; try { //Connects to zoom's search engine and looks for given cpu from benhmarks section. html = Jsoup.connect("https://www.passmark.com/search/zoomsearch.php?zoom_sort=0&zoom_query=" + encodedName + "&zoom_cat%5B%5D=5").get(); } catch (IOException e) { System.out.println("Connection throws an exception: " + e); } //Regex check is used to validate correct search result. if (html != null) { Elements links = html.select("div.results"); links = links.select("a[href~=^(https?:\\/\\/www.cpubenchmark.net/cpu.php\\?)]"); url = links.attr("href"); if (url.isEmpty()) { return "No results found for: " + cpuName; } } //message for connection issues. else { return "Connection to the search engine failed."; } return url; }
From source file:io.sightly.tck.html.HTMLExtractor.java
/** * Checks if an element matched by the {@code selector} contains or not the attribute {@code attributeName}, * depending on the value of the {@code exists} flag. Additionally, the attribute's value can be checked against {@code attributeValue}. * * @param url the url that identifies the markup * @param markup the markup/* w w w .ja v a 2 s . c o m*/ * @param selector the selector used for retrieval * @param exists flag that defines if the attribute is expected to exist or not * @param attributeName the attribute's name * @param attributeValue the attribute's value * @return {@code true} if the attribute matches the defined conditions, {@code false} otherwise */ public static boolean hasAttribute(String url, String markup, String selector, boolean exists, String attributeName, String attributeValue) { ensureMarkup(url, markup); Document document = documents.get(url); Elements elements = document.select(selector); if (elements.size() > 0) { if (exists) { if (StringUtils.isNotEmpty(attributeValue)) { String value = elements.attr(attributeName); return attributeValue.equals(value); } return true; } else { return elements.hasAttr(attributeName); } } return false; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static String[] getGradebookCredentials(String html) { Element doc = Jsoup.parse(html); Elements userIdElements = doc.getElementsByAttributeValue("name", "userId"); Elements passwords = doc.getElementsByAttributeValue("name", "password"); String userId = userIdElements.attr("value"); String password = passwords.attr("value"); return new String[] { userId, password }; }
From source file:de.limod.portals.AutoScout.java
private String getUrl(Element result) { Elements t = result.select(AutoScout.SELECTOR_LINK); String attr = t.attr("href"); return attr;/*from w ww . j av a2s.c om*/ }
From source file:amazonechoapi.AmazonEchoApi.java
public boolean httpLogin() { try {/* w w w. j ava 2 s. c o m*/ String output = httpGet(""); Document doc = Jsoup.parse(output); Elements forms = doc.select("form"); String action = forms.attr("action"); if (action.isEmpty()) { return false; } Elements hidden = doc.select("input[type=hidden]"); List<NameValuePair> nvps = new ArrayList<>(); nvps.add(new BasicNameValuePair("email", USERNAME)); nvps.add(new BasicNameValuePair("password", PASSWORd)); nvps.add(new BasicNameValuePair("create", "0")); for (Element el1 : hidden) { nvps.add(new BasicNameValuePair(el1.attr("name"), el1.attr("value"))); } HttpPost httpPost = new HttpPost(action); httpPost.setHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13"); httpPost.setHeader(HttpHeaders.REFERER, BASE_URL); httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); HttpResponse httpResponse = httpclient.execute(httpPost); httpResponse.getEntity(); HttpEntity entity = httpResponse.getEntity(); if (entity != null) { EntityUtils.consume(entity); } System.out.println("Login successful"); return true; } catch (Exception e) { System.out.println("Login Error:" + e.getMessage()); return false; } }
From source file:com.stratio.qa.utils.GosecSSOUtils.java
/** * This method provide dcos and sso token to be used to generate client cookie * @return cookieToken list of token generated * @throws Exception exception/*from w w w.j a v a2 s . c o m*/ */ public HashMap<String, String> ssoTokenGenerator() throws Exception { String protocol = "https://"; HashMap<String, String> cookieToken = new HashMap<>(); SSLContext sslContext = SSLContext.getInstance("SSL"); // set up a TrustManager that trusts everything sslContext.init(null, ALL_TRUSTING_TRUST_MANAGER, new SecureRandom()); HttpClientContext context = HttpClientContext.create(); HttpGet httpGet = new HttpGet(protocol + ssoHost + "/login"); HttpClient client = HttpClientBuilder.create().setSslcontext(sslContext) .setRedirectStrategy(new LaxRedirectStrategy()) .setDefaultRequestConfig(RequestConfig.custom().setCircularRedirectsAllowed(true).build()).build(); try { HttpResponse firstResponse = client.execute(httpGet, context); logger.debug(firstResponse.getStatusLine().toString()); Document doc = Jsoup.parse(getStringFromIS(firstResponse.getEntity().getContent())); Elements code = doc.select("[name=lt]"); String loginCode = code.attr("value"); String executionCode = doc.select("[name=execution]").attr("value"); for (Header oneHeader : firstResponse.getAllHeaders()) { logger.debug(oneHeader.getName() + ":" + oneHeader.getValue()); } URI redirect = context.getRedirectLocations().get(context.getRedirectLocations().size() - 1); List<NameValuePair> params = new ArrayList<>(); params.add(new BasicNameValuePair("_eventId", "submit")); params.add(new BasicNameValuePair("submit", "LOGIN")); params.add(new BasicNameValuePair("username", userName)); params.add(new BasicNameValuePair("password", passWord)); params.add(new BasicNameValuePair("lt", loginCode)); params.add(new BasicNameValuePair("execution", executionCode)); HttpPost httpPost = new HttpPost(redirect); httpPost.setEntity(new UrlEncodedFormEntity(params)); HttpResponse secondResponse = client.execute(httpPost, context); for (Header oneHeader : secondResponse.getAllHeaders()) { logger.debug(oneHeader.getName() + ":" + oneHeader.getValue()); } HttpGet managementGet = new HttpGet(protocol + ssoHost + managementHost); client.execute(managementGet, context); for (Cookie oneCookie : context.getCookieStore().getCookies()) { logger.debug(oneCookie.getName() + ":" + oneCookie.getValue()); cookieToken.put(oneCookie.getName(), oneCookie.getValue()); } } catch (Exception e) { e.getStackTrace(); } return cookieToken; }
From source file:org.eclipseplugins.impexeditor.core.utils.ImpexHttpClient.java
private String getCSrfToken(final String jSessionid) throws IOException { //<meta name="_csrf" content="c1dee1f7-8c79-43b1-8f3f-767662abc87a" /> final Document doc = Jsoup.connect(hostName).cookie("JSESSIONID", jSessionid).get(); final Elements csrfMetaElt = doc.select("meta[name=_csrf]"); final String csrfToken = csrfMetaElt.attr("content"); return csrfToken; }
From source file:sachin.spider.Page.java
/** * This function is called to get all the meta tags. * * @return Map of all meta tags on the page *///from w w w. ja v a 2s. c om public Map<String, String> getAllMetaTags() { Map<String, String> metaTags = new HashMap<String, String>(); Elements tags = document.select("meta"); metaTags.put(tags.attr("name"), tags.attr("content")); return metaTags; }