Example usage for org.jsoup.select Elements attr

List of usage examples for org.jsoup.select Elements attr

Introduction

In this page you can find the example usage for org.jsoup.select Elements attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute value from the first matched element that has the attribute.

Usage

From source file:controllers.NWProxy.java

private static org.jsoup.nodes.Document RedirectionHandler(String url) throws IOException {

    org.jsoup.nodes.Document articleDoc = Jsoup.connect(url).get();
    String officialUrl = "http://www.newsweek.com";
    String redirectedUrl = null;/*  ww  w  . j  a v a 2  s.  com*/

    Elements meta = articleDoc.select("html head meta");

    if (meta.attr("http-equiv").contains("refresh")) {
        redirectedUrl = officialUrl
                + meta.attr("content").substring(meta.attr("content").indexOf("=") + 1).replaceAll("'", "");
        return RedirectionHandler(redirectedUrl);
    }

    return articleDoc;
}

From source file:app.data.parse.WebPageUtil.java

public static WebPageInfo parse(String url, Cache<String, WebPageInfo> urlInfoCache) throws IOException {
    String original = url;//from  ww  w. j av  a2s .co  m

    // hit toutiao.io
    // fixme http://toutiao.io/shares/640539/url
    if (original.startsWith("https://toutiao.io/posts/")) {
        original = original.replace("/posts/", "/k/");
    }

    // check cache
    WebPageInfo info = urlInfoCache != null ? urlInfoCache.getIfPresent(original) : null;
    if (info != null) {
        return info;
    } else {
        info = new WebPageInfo();
        info.url = original;
    }

    // attach url
    Document doc = requestUrl(info.url);
    info.url = doc.baseUri(); // or doc.location()

    // hit gold.xitu.io
    if (info.url.startsWith("http://gold.xitu.io/entry/")) {
        Elements origin = doc.select("div[class=ellipsis]");
        Elements originLink = origin.select("a[class=share-link]");
        info.url = originLink.attr("href");

        // reconnect
        doc = requestUrl(info.url);
        info.url = doc.baseUri(); // or doc.location()
    }

    info.url = smartUri(info.url);

    // get title
    Elements metaTitle = doc.select("meta[property=og:title]");
    if (metaTitle != null) {
        info.title = metaTitle.attr("content");
    }
    if (StringUtils.isEmpty(info.title)) {
        metaTitle = doc.select("meta[property=twitter:title]");
        if (metaTitle != null) {
            info.title = metaTitle.attr("content");
        }
        info.title = StringUtils.isEmpty(info.title) ? doc.title() : info.title;
    }

    // get desc
    Elements metaDesc = doc.select("meta[property=og:description]");
    if (metaDesc != null) {
        info.description = metaDesc.attr("content");
    }
    if (StringUtils.isEmpty(info.description)) {
        metaDesc = doc.select("meta[property=twitter:description]");
        if (metaDesc != null) {
            info.description = metaDesc.attr("content");
        }
        if (StringUtils.isEmpty(info.description)) {
            metaDesc = doc.select("meta[name=description]");
            if (metaDesc != null) {
                info.description = metaDesc.attr("content");
            }
            if (StringUtils.isEmpty(info.description)) {
                metaDesc = doc.body().select("p");
                if (metaDesc != null) {
                    for (Element element : metaDesc) {
                        info.description = element.text();
                        if (info.description != null && info.description.length() >= 20) {
                            break;
                        }
                    }
                }
            }
        }
    }
    info.description = ellipsis(info.description, 140, "...");

    // cache info
    if (urlInfoCache != null) {
        urlInfoCache.put(original, info);
    }
    return info;
}

From source file:com.cbmapi.CbmAPI.java

public static String searchCpuByName(String cpuName) {
    String encodedName = encodeToUrl(cpuName);
    Document html = null;//  w  w w .j a v a2s  .c o  m
    String url = null;
    try {
        //Connects to zoom's search engine and looks for given cpu from benhmarks section.
        html = Jsoup.connect("https://www.passmark.com/search/zoomsearch.php?zoom_sort=0&zoom_query="
                + encodedName + "&zoom_cat%5B%5D=5").get();
    } catch (IOException e) {
        System.out.println("Connection throws an exception: " + e);
    }

    //Regex check is used to validate correct search result.
    if (html != null) {
        Elements links = html.select("div.results");
        links = links.select("a[href~=^(https?:\\/\\/www.cpubenchmark.net/cpu.php\\?)]");
        url = links.attr("href");
        if (url.isEmpty()) {
            return "No results found for: " + cpuName;
        }
    } //message for connection issues.
    else {
        return "Connection to the search engine failed.";
    }
    return url;
}

From source file:io.sightly.tck.html.HTMLExtractor.java

/**
 * Checks if an element matched by the {@code selector} contains or not the attribute {@code attributeName},
 * depending on the value of the {@code exists} flag. Additionally, the attribute's value can be checked against {@code attributeValue}.
 *
 * @param url            the url that identifies the markup
 * @param markup         the markup/*  w  w w .ja  v a 2  s  .  c o  m*/
 * @param selector       the selector used for retrieval
 * @param exists         flag that defines if the attribute is expected to exist or not
 * @param attributeName  the attribute's name
 * @param attributeValue the attribute's value
 * @return {@code true} if the attribute matches the defined conditions, {@code false} otherwise
 */
public static boolean hasAttribute(String url, String markup, String selector, boolean exists,
        String attributeName, String attributeValue) {
    ensureMarkup(url, markup);
    Document document = documents.get(url);
    Elements elements = document.select(selector);
    if (elements.size() > 0) {
        if (exists) {
            if (StringUtils.isNotEmpty(attributeValue)) {
                String value = elements.attr(attributeName);
                return attributeValue.equals(value);
            }
            return true;
        } else {
            return elements.hasAttr(attributeName);
        }
    }
    return false;
}

From source file:app.sunstreak.yourpisd.net.Parser.java

public static String[] getGradebookCredentials(String html) {
    Element doc = Jsoup.parse(html);
    Elements userIdElements = doc.getElementsByAttributeValue("name", "userId");
    Elements passwords = doc.getElementsByAttributeValue("name", "password");
    String userId = userIdElements.attr("value");
    String password = passwords.attr("value");
    return new String[] { userId, password };
}

From source file:de.limod.portals.AutoScout.java

private String getUrl(Element result) {
    Elements t = result.select(AutoScout.SELECTOR_LINK);
    String attr = t.attr("href");
    return attr;/*from  w  ww .  j  av a2s.c om*/
}

From source file:amazonechoapi.AmazonEchoApi.java

public boolean httpLogin() {

    try {/* w w  w. j  ava  2  s.  c  o m*/
        String output = httpGet("");

        Document doc = Jsoup.parse(output);
        Elements forms = doc.select("form");
        String action = forms.attr("action");

        if (action.isEmpty()) {
            return false;
        }

        Elements hidden = doc.select("input[type=hidden]");
        List<NameValuePair> nvps = new ArrayList<>();
        nvps.add(new BasicNameValuePair("email", USERNAME));
        nvps.add(new BasicNameValuePair("password", PASSWORd));
        nvps.add(new BasicNameValuePair("create", "0"));

        for (Element el1 : hidden) {
            nvps.add(new BasicNameValuePair(el1.attr("name"), el1.attr("value")));
        }

        HttpPost httpPost = new HttpPost(action);
        httpPost.setHeader(HttpHeaders.USER_AGENT,
                "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13");
        httpPost.setHeader(HttpHeaders.REFERER, BASE_URL);
        httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

        HttpResponse httpResponse = httpclient.execute(httpPost);

        httpResponse.getEntity();
        HttpEntity entity = httpResponse.getEntity();
        if (entity != null) {
            EntityUtils.consume(entity);
        }
        System.out.println("Login successful");
        return true;

    } catch (Exception e) {
        System.out.println("Login Error:" + e.getMessage());
        return false;
    }
}

From source file:com.stratio.qa.utils.GosecSSOUtils.java

/**
 * This method provide dcos and sso token to be used to generate client cookie
 * @return cookieToken list of token generated
 * @throws Exception exception/*from   w  w  w.j a v a2 s  .  c o  m*/
 */
public HashMap<String, String> ssoTokenGenerator() throws Exception {
    String protocol = "https://";
    HashMap<String, String> cookieToken = new HashMap<>();

    SSLContext sslContext = SSLContext.getInstance("SSL");
    // set up a TrustManager that trusts everything
    sslContext.init(null, ALL_TRUSTING_TRUST_MANAGER, new SecureRandom());
    HttpClientContext context = HttpClientContext.create();
    HttpGet httpGet = new HttpGet(protocol + ssoHost + "/login");
    HttpClient client = HttpClientBuilder.create().setSslcontext(sslContext)
            .setRedirectStrategy(new LaxRedirectStrategy())
            .setDefaultRequestConfig(RequestConfig.custom().setCircularRedirectsAllowed(true).build()).build();
    try {
        HttpResponse firstResponse = client.execute(httpGet, context);

        logger.debug(firstResponse.getStatusLine().toString());
        Document doc = Jsoup.parse(getStringFromIS(firstResponse.getEntity().getContent()));
        Elements code = doc.select("[name=lt]");
        String loginCode = code.attr("value");
        String executionCode = doc.select("[name=execution]").attr("value");
        for (Header oneHeader : firstResponse.getAllHeaders()) {
            logger.debug(oneHeader.getName() + ":" + oneHeader.getValue());
        }

        URI redirect = context.getRedirectLocations().get(context.getRedirectLocations().size() - 1);

        List<NameValuePair> params = new ArrayList<>();
        params.add(new BasicNameValuePair("_eventId", "submit"));
        params.add(new BasicNameValuePair("submit", "LOGIN"));
        params.add(new BasicNameValuePair("username", userName));
        params.add(new BasicNameValuePair("password", passWord));
        params.add(new BasicNameValuePair("lt", loginCode));
        params.add(new BasicNameValuePair("execution", executionCode));
        HttpPost httpPost = new HttpPost(redirect);
        httpPost.setEntity(new UrlEncodedFormEntity(params));
        HttpResponse secondResponse = client.execute(httpPost, context);

        for (Header oneHeader : secondResponse.getAllHeaders()) {
            logger.debug(oneHeader.getName() + ":" + oneHeader.getValue());
        }

        HttpGet managementGet = new HttpGet(protocol + ssoHost + managementHost);
        client.execute(managementGet, context);

        for (Cookie oneCookie : context.getCookieStore().getCookies()) {
            logger.debug(oneCookie.getName() + ":" + oneCookie.getValue());
            cookieToken.put(oneCookie.getName(), oneCookie.getValue());
        }

    } catch (Exception e) {
        e.getStackTrace();
    }
    return cookieToken;
}

From source file:org.eclipseplugins.impexeditor.core.utils.ImpexHttpClient.java

private String getCSrfToken(final String jSessionid) throws IOException {
    //<meta name="_csrf" content="c1dee1f7-8c79-43b1-8f3f-767662abc87a" />
    final Document doc = Jsoup.connect(hostName).cookie("JSESSIONID", jSessionid).get();
    final Elements csrfMetaElt = doc.select("meta[name=_csrf]");
    final String csrfToken = csrfMetaElt.attr("content");
    return csrfToken;

}

From source file:sachin.spider.Page.java

/**
 * This function is called to get all the meta tags.
 *
 * @return Map of all meta tags on the page
 *///from   w  w  w.  ja v a  2s.  c om
public Map<String, String> getAllMetaTags() {
    Map<String, String> metaTags = new HashMap<String, String>();
    Elements tags = document.select("meta");
    metaTags.put(tags.attr("name"), tags.attr("content"));
    return metaTags;
}