Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:org.keycloak.testsuite.util.saml.UpdateProfileBuilder.java

public HttpUriRequest handleUpdateProfile(String loginPage, URI currentURI) {
    org.jsoup.nodes.Document theUpdateProfilePage = Jsoup.parse(loginPage);
    Set<String> unusedParams = new HashSet<>(this.parameters.keySet());

    List<NameValuePair> parameters = new LinkedList<>();
    for (Element form : theUpdateProfilePage.getElementsByTag("form")) {
        String method = form.attr("method");
        String action = form.attr("action");
        boolean isPost = method != null && "post".equalsIgnoreCase(method);

        for (Element input : form.getElementsByTag("input")) {
            if (this.parameters.containsKey(input.attr("name"))) {
                parameters.add(/*from  www . j a v  a 2  s .c o m*/
                        new BasicNameValuePair(input.attr("name"), this.parameters.get(input.attr("name"))));
                unusedParams.remove(input.attr("name"));
            }
        }

        if (!unusedParams.isEmpty()) {
            LOG.warnf("Unused parameter names at Update Profile page: %s", unusedParams);
        }

        if (isPost) {
            HttpPost res = new HttpPost(action);

            UrlEncodedFormEntity formEntity;
            try {
                formEntity = new UrlEncodedFormEntity(parameters, "UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
            res.setEntity(formEntity);

            return res;
        } else {
            UriBuilder b = UriBuilder.fromPath(action);
            for (NameValuePair parameter : parameters) {
                b.queryParam(parameter.getName(), parameter.getValue());
            }
            return new HttpGet(b.build());
        }
    }

    throw new IllegalArgumentException("Invalid update profile form: " + loginPage);
}

From source file:com.github.brandtg.pantopod.crawler.CrawlingEventHandler.java

@Override
public Set<CrawlEvent> handle(CrawlEvent event) throws Exception {
    Set<CrawlEvent> nextEvents = new HashSet<>();

    // Get url//from w w  w.  ja  v  a  2s.  c om
    URI url = URI.create(event.getUrl());
    Document dom = null;
    boolean created = false;
    if (!checkErrors || !hasError(url)) {
        HttpGet req = new HttpGet(url);
        HttpResponse res = httpClient.execute(req);

        try {
            if (res.getStatusLine().getStatusCode() == 200) {
                byte[] domBytes = IOUtils.toByteArray(res.getEntity().getContent());
                created = handleData(url, domBytes);
                dom = Jsoup.parse(new String(domBytes));
            } else {
                LOG.error("Error for {} #=> {}", url, res.getStatusLine().getStatusCode());
                markError(url, res.getStatusLine().getStatusCode());
            }
        } finally {
            if (res.getEntity() != null) {
                EntityUtils.consumeQuietly(res.getEntity());
            }
        }
    }

    // Extract links
    if ((created || traverseDuplicates) && dom != null) {
        for (Element element : dom.select("a")) {
            String href = element.attr("href");
            if (href != null) {
                URI nextUri = getNextUri(url, href, event.getChroot());
                if (shouldExplore(nextUri) && isSameDomain(url, nextUri) && isDifferentPage(url, nextUri)) {
                    CrawlEvent nextEvent = new CrawlEvent(event);
                    nextEvent.setUrl(nextUri.toString());
                    nextEvent.setParentUrl(event.getUrl());
                    nextEvent.setDepth(event.getDepth() + 1);
                    nextEvents.add(nextEvent);
                    LOG.debug("Exploring {}", nextUri);
                } else {
                    LOG.debug("Skipping {}", nextUri);
                }
            }
        }
    }

    return nextEvents;
}

From source file:com.johan.vertretungsplan.parser.UntisSubstitutionParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    String encoding = schule.getData().getString("encoding");
    Document doc = Jsoup.parse(this.httpGet(baseUrl, encoding));
    Elements classes = doc.select("td a");

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();
    VertretungsplanTag tag = new VertretungsplanTag();
    tage.add(tag);//w w w  .  j  a  v  a 2s . co m
    v.setTage(tage);

    String stand = doc.select("td[align=right]:not(:has(b))").text();
    tag.setStand(stand);

    Pattern dayPattern = Pattern.compile("\\d\\d?.\\d\\d?. / \\w+");

    for (Element klasse : classes) {
        Document classDoc = Jsoup.parse(
                httpGet(baseUrl.substring(0, baseUrl.lastIndexOf("/")) + "/" + klasse.attr("href"), encoding));
        if (tag.getDatum() == null) {
            String title = classDoc.select("font[size=5]").text();
            Matcher matcher = dayPattern.matcher(title);
            if (matcher.find())
                tag.setDatum(matcher.group());
        }

        Element table = classDoc.select("table[rules=all]").first();
        parseVertretungsplanTable(table, data, tag);
    }
    return v;
}

From source file:com.clonephpscrapper.crawler.ClonePhpScrapper.java

public void crawledCategories() throws URISyntaxException, IOException, InterruptedException, Exception {

    String url = "http://clonephp.com/";

    //       Document doc = Jsoup.parse(fetchPage(new URI(url)));
    String response = "";
    response = new GetRequestHandler().doGetRequest(new URL(url));

    Document doc = Jsoup.parse(response);

    Elements ele = doc.select("table[class=dir] tbody tr td table[class=dir_cat] tbody tr th a");//.first();

    for (Element ele1 : ele) {
        objCategories = new Categories();

        String categoryName = ele1.text();
        String categoryUrl = "http://clonephp.com/" + ele1.attr("href");

        System.out.println("CATEGORY_NAME : " + categoryName);
        System.out.println("CATEGORY_URL  : " + categoryUrl);

        objCategories.setCategoryName(categoryName);
        objCategories.setCategoryUrl(categoryUrl);

        objClonePhpDaoImpl.insertCategoriesData(objCategories);

        //            objCrawlingEachUrlData.crawlingUrlData(categoryUrl);
    }/*from w  w w. j a v a 2s.  co m*/

    List<Future<String>> list = new ArrayList<Future<String>>();
    ExecutorService executor = Executors.newFixedThreadPool(5);

    List<Categories> listCatogories = objClonePhpDaoImpl.getCategoriesDataList();

    for (Categories listCatogory : listCatogories) {

        try {
            Callable worker = new CrawlingEachUrlData(listCatogory, objClonePhpDaoImpl);
            Future<String> future = executor.submit(worker);
            list.add(future);
        } catch (Exception exx) {
            System.out.println(exx);
        }

    }

    for (Future<String> fut : list) {
        try {
            //print the return value of Future, notice the output delay in console
            // because Future.get() waits for task to get completed
            System.out.println(new Date() + "::" + fut.get());
        } catch (InterruptedException | ExecutionException ep) {
            ep.printStackTrace();
        }
    }
    //shut down the executor service now
    executor.shutdown();

}

From source file:com.vaushell.superpipes.tools.http.ImageExtractor.java

/**
 * Return the biggest image URI of this webpage.
 *
 * @param rootURI Webpage URI//from  w w w .  j a v  a  2 s .co  m
 * @return Biggest image
 * @throws IOException
 */
public BufferedImage extractBiggest(final URI rootURI) throws IOException {
    final List<URI> imagesURIs = new ArrayList<>();
    HttpEntity responseEntity = null;
    try {
        // Exec request
        final HttpGet get = new HttpGet(rootURI);

        try (final CloseableHttpResponse response = client.execute(get)) {
            final StatusLine sl = response.getStatusLine();
            if (sl.getStatusCode() != 200) {
                throw new IOException(sl.getReasonPhrase());
            }

            responseEntity = response.getEntity();

            try (final InputStream is = responseEntity.getContent()) {
                final Document doc = Jsoup.parse(is, "UTF-8", rootURI.toString());

                final Elements elts = doc.select("img");
                if (elts != null) {
                    for (final Element elt : elts) {
                        final String src = elt.attr("src");
                        if (src != null && !src.isEmpty()) {
                            try {
                                imagesURIs.add(rootURI.resolve(src));
                            } catch (final IllegalArgumentException ex) {
                                // Ignore wrong encoded URI
                            }
                        }
                    }
                }
            }
        }
    } finally {
        if (responseEntity != null) {
            EntityUtils.consume(responseEntity);
        }
    }

    final BufferedImage[] images = new BufferedImage[imagesURIs.size()];
    final ExecutorService service = Executors.newCachedThreadPool();
    for (int i = 0; i < imagesURIs.size(); ++i) {
        final int num = i;

        service.execute(new Runnable() {
            @Override
            public void run() {
                try {
                    images[num] = HTTPhelper.loadPicture(client, imagesURIs.get(num));
                } catch (final IOException ex) {
                    images[num] = null;
                }
            }
        });
    }

    service.shutdown();

    try {
        service.awaitTermination(1L, TimeUnit.DAYS);
    } catch (final InterruptedException ex) {
        // Ignore
    }

    BufferedImage biggest = null;
    int biggestSize = Integer.MIN_VALUE;
    for (int i = 0; i < imagesURIs.size(); ++i) {
        if (images[i] != null) {
            final int actualSize = images[i].getWidth() * images[i].getHeight();
            if (actualSize > biggestSize) {
                biggest = images[i];

                biggestSize = actualSize;
            }
        }
    }

    return biggest;
}

From source file:com.romeikat.datamessie.core.processing.task.documentProcessing.redirecting.DocumentRedirector.java

private String applyHardCodedRedirectingRule(final RawContent rawContent) {
    // Parse raw content
    final org.jsoup.nodes.Document jsoupDocument = Jsoup.parse(rawContent.getContent());

    final String title = jsoupDocument.title();
    final boolean documentTitleMatches = title.equalsIgnoreCase("advertisment");
    if (documentTitleMatches) {
        // Map link target -> number of occurrence
        final Map<String, Integer> linkCounts = new HashMap<String, Integer>();
        // Count link occurrences
        final Elements links = jsoupDocument.select("a[href]");
        for (final Element link : links) {
            final String linkTarget = link.attr("href");
            Integer linkCount = linkCounts.get(linkTarget);
            if (linkCount == null) {
                linkCount = 0;/*from w  ww.j a  va  2s . co  m*/
            }
            linkCount++;
            linkCounts.put(linkTarget, linkCount);
        }
        // Get most frequent link (for multiple highest link counts, use the "lower" link URL)
        String mostFrequentLinkTarget = null;
        int mostFrequentLinkCount = 0;
        for (final String linkTarget : linkCounts.keySet()) {
            final int linkCount = linkCounts.get(linkTarget);
            if (linkCount > mostFrequentLinkCount || linkCount == mostFrequentLinkCount
                    && linkTarget.toLowerCase().compareTo(mostFrequentLinkTarget.toLowerCase()) < 0) {
                mostFrequentLinkTarget = linkTarget;
                mostFrequentLinkCount = linkCount;
            }
        }
        // Use most frequent link, if one was found
        if (mostFrequentLinkTarget != null) {
            return mostFrequentLinkTarget;
        }
    }

    // No redirecting
    return null;
}

From source file:org.commonjava.indy.ftest.core.urls.StoreOneAndVerifyInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";

    client.content().store(hosted, STORE, path, stream);

    final IndyClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(client.content().contentUrl(hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.item-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE, root, fname);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }//from   w w w . j  av  a 2  s .c  o  m
}

From source file:org.commonjava.aprox.folo.ftest.urls.StoreOneAndVerifyInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";
    final String track = "track";

    content.store(track, hosted, STORE, path, stream);

    final AproxClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(content.contentUrl(track, hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.item-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE, root, fname);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }/* w ww  .  ja va2  s.c o  m*/
}

From source file:org.commonjava.indy.folo.ftest.urls.StoreOneAndVerifyInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";
    final String track = "track";

    content.store(track, hosted, STORE, path, stream);

    final IndyClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(content.contentUrl(track, hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.item-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE, root, fname);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }//w w w  .j av  a  2 s  .  c  o  m
}

From source file:org.commonjava.indy.ftest.core.urls.StoreOneAndSourceStoreUrlInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";

    client.content().store(hosted, STORE, path, stream);

    final IndyClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(client.content().contentUrl(hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.source-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }/* www .  j  a  v  a 2 s  .  c  o  m*/
}