Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:feedzilla.Feed.java

@Override
public void run() {
    try {//from  w  w w .  ja va  2 s.c  o  m
        Thread.sleep((new Random()).nextInt(60 * 1000));
    } catch (InterruptedException ex) {
        Log.warn("Could not sleep Thread", ex);
    }

    Document doc = null;
    boolean get = true;
    int trysCount = 0;
    do {
        get = true;
        try {
            doc = Jsoup.connect(this.link).timeout(60 * 1000).userAgent(
                    "Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6")
                    .referrer("http://www.google.com").get();
        } catch (IOException ex) {
            Logger.getLogger(Feed.class.getName()).log(Level.SEVERE, null, ex);
            Log.warn("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName()
                    + " - Could not get Feed page from FeedZilla", ex);
            get = false;
            if (++trysCount > 5) {
                Log.fatal("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName() + " - "
                        + "Five attempts and has not yet been possible to "
                        + "retrieve the page from filezilla. Ignoring this news.");
                return;
            }
        }
    } while (!get);

    Elements elements = doc.body().select("iframe");
    for (Element element : elements) {
        try {
            this.link = URLDecoder.decode(element.attr("src"), "UTF-8");
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(Feed.class.getName()).log(Level.SEVERE, null, ex);
            Log.fatal("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName() + " - "
                    + "Could not get the news link from FeedZilla pages");
            return;
        }
    }
    this.link = getUrlInParams(this.link);
    try {
        this.news = (new NewsCrawler(this.link)).getNews();
    } catch (Exception ex) {
        Log.fatal("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName() + " - "
                + "Could not retrieve news from link " + this.link, ex);
        return;
    }

    newsXMLFile.getParentFile().mkdirs();
    try {
        FileUtils.writeStringToFile(newsXMLFile, this.toXML());
        Log.info("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName()
                + " - Successfuly saved!");
        System.out.println("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName()
                + " - Successfuly saved!");
    } catch (IOException ex) {
        Log.error("News " + this.category + "/" + this.subcategory + "/" + newsXMLFile.getName()
                + " - Could not save news into file", ex);
    }
}

From source file:com.megatome.j2d.support.JavadocSupport.java

private static List<SearchIndexValue> indexFile(File f) throws BuilderException {
    final List<SearchIndexValue> values = new ArrayList<>();
    final Elements elements = loadAndFindLinks(f);
    for (final Element e : elements) {
        Element parent = e.parent();
        if (!parent.child(0).equals(e)) {
            continue;
        }//from  www. ja v  a2  s .  co m
        final String parentTagName = parent.tagName();
        if (parentPattern.matcher(parentTagName).matches()) {
            parent = parent.parent();
            if (!parent.child(0).equals(e.parent())) {
                continue;
            }
        }
        if (!containsIgnoreCase(parentTagName, "dt")) {
            continue;
        }
        final String text = parent.text();
        final String name = e.text();
        final String className = parent.className();

        final MatchType type = getMatchingType(text, className);

        if (null == type) {
            System.err.println(String.format(
                    "Unknown type found. Please submit a bug report. (Text: %s, Name: %s, className: %s)", text,
                    name, className));
            continue;
        }
        try {
            final String linkPath = URLDecoder.decode(e.attr("href"), "UTF-8");

            values.add(new SearchIndexValue(name, type, linkPath));
        } catch (UnsupportedEncodingException ex) {
            throw new BuilderException("Error decoding a link", ex);
        }
    }
    return values;
}

From source file:org.commonjava.maven.galley.transport.htcli.internal.HttpListing.java

@Override
public ListingResult call() {
    request = new HttpGet(url);

    // return null if something goes wrong, after setting the error.
    // What we should be doing here is trying to retrieve the html directory
    // listing, then parse out the filenames from that...
    ///*  ww  w.ja  v  a  2  s  .c o m*/
    // They'll be links, so that's something to key in on.
    //
    // I'm wondering about this:
    // http://jsoup.org/cookbook/extracting-data/selector-syntax
    // the dependency is: org.jsoup:jsoup:1.7.2

    ListingResult result = null;
    InputStream in = null;

    String oldName = Thread.currentThread().getName();
    try {
        String newName = oldName + ": LIST " + url;
        Thread.currentThread().setName(newName);

        if (executeHttp()) {
            in = response.getEntity().getContent();
            String listing = IOUtils.toString(in);
            Logger logger = LoggerFactory.getLogger(getClass());
            logger.debug("Got raw listing content:\n\n{}\n\n", listing);

            final ArrayList<String> al = new ArrayList<>();

            // TODO: Charset!!
            Document doc = Jsoup.parse(listing, url);
            //                try
            //                {
            //                }
            //                catch ( final IOException e )
            //                {
            //                    this.error =
            //                            new TransferLocationException( resource.getLocation(), "Invalid HTML in: {}. Reason: {}", e, url, e.getMessage() );
            //                }

            if (doc != null) {
                for (final Element link : doc.select("a")) {
                    String linkText = link.text();
                    String linkHref = link.attr("href");

                    URL url = new URL(this.url);

                    boolean sameServer = isSameServer(url, linkHref);
                    boolean subpath = isSubpath(url, linkHref);

                    if ((sameServer && subpath)
                            && (linkHref.endsWith(linkText) || linkHref.endsWith(linkText + '/'))
                            && !EXCLUDES.contains(linkText)) {
                        al.add(linkText);
                    }
                }

                result = new ListingResult(resource, al.toArray(new String[al.size()]));
            }
        }
    } catch (final TransferException e) {
        this.error = e;
    } catch (final IOException e) {
        this.error = new TransferException("Failed to construct directory listing for: {}. Reason: {}", e, url,
                e.getMessage());
    } finally {
        closeQuietly(in);
        cleanup();
        if (oldName != null) {
            Thread.currentThread().setName(oldName);
        }
    }

    return error == null ? result : null;
}

From source file:gov.medicaid.screening.dao.impl.PharmacyLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param firstOrBusinessName First or Business name.
 * @param lastName Last name./*from   w  w w . ja  v a 2 s.c o  m*/
 * @param licenseNumber License number.
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws ClientProtocolException When client does not support protocol used.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 */
private SearchResult<License> getAllResults(String firstOrBusinessName, String lastName, String licenseNumber)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    String path = "/mnbop/GLSuiteWeb/Clients/MNBOPharm/Public/";
    URIBuilder builder = new URIBuilder(getSearchURL()).setPath(path + "LicenseeSearch.aspx");

    HttpGet httpget = new HttpGet(builder.build());
    HttpEntity entity = client.execute(httpget).getEntity();
    Document page = Jsoup.parse(EntityUtils.toString(entity));

    HttpPost httppost = new HttpPost(builder.build());
    List<NameValuePair> parameters = new ArrayList<NameValuePair>();
    parameters.add(new BasicNameValuePair("__VIEWSTATE", page.select("#__VIEWSTATE").first().val()));
    parameters.add(
            new BasicNameValuePair("__VIEWSTATEENCRYPTED", page.select("#__VIEWSTATEENCRYPTED").first().val()));
    parameters
            .add(new BasicNameValuePair("__EVENTVALIDATION", page.select("#__EVENTVALIDATION").first().val()));
    parameters.add(new BasicNameValuePair("ObjectID", page.select("#ObjectID").first().val()));
    parameters.add(new BasicNameValuePair("ObjectTypeID", page.select("#ObjectTypeID").first().val()));
    parameters.add(new BasicNameValuePair("waFirstName", Util.defaultString(firstOrBusinessName)));
    parameters.add(new BasicNameValuePair("waLastName", Util.defaultString(lastName)));
    parameters.add(new BasicNameValuePair("waLicenseNumber", Util.defaultString(licenseNumber)));

    httppost.setEntity(new UrlEncodedFormEntity(parameters, Charset.forName("UTF-8")));
    HttpResponse postResponse = client.execute(httppost);

    entity = postResponse.getEntity();
    // licenses list
    List<License> licenseList = new ArrayList<License>();
    if (entity != null) {
        page = Jsoup.parse(EntityUtils.toString(entity));
        Elements trs = page.select("table#DataTable a");
        if (trs != null) {
            for (Element element : trs) {
                String href = element.attr("href");
                HttpGet detailsGet = new HttpGet(getSearchURL() + path + href);
                HttpResponse detailsResponse = client.execute(detailsGet);
                HttpEntity detailsEntity = detailsResponse.getEntity();
                if (detailsEntity != null) {
                    Document details = Jsoup.parse(EntityUtils.toString(detailsEntity));
                    licenseList.add(parseLicense(details));
                }
            }
        }
    }
    SearchResult<License> result = new SearchResult<License>();
    result.setItems(licenseList);
    return result;
}

From source file:org.keycloak.testsuite.util.saml.LoginBuilder.java

/**
 * Prepares a GET/POST request for logging the given user into the given login page. The login page is expected
 * to have at least input fields with id "username" and "password".
 *
 * @param user/*from ww  w .  ja  v a2s  . c o m*/
 * @param loginPage
 * @return
 */
private HttpUriRequest handleLoginPage(String loginPage, URI currentURI) {
    if (idpAlias != null) {
        org.jsoup.nodes.Document theLoginPage = Jsoup.parse(loginPage);
        Element zocialLink = theLoginPage.getElementById("zocial-" + this.idpAlias);
        assertThat("Unknown idp: " + this.idpAlias, zocialLink, Matchers.notNullValue());
        final String link = zocialLink.attr("href");
        assertThat("Invalid idp link: " + this.idpAlias, link, Matchers.notNullValue());
        return new HttpGet(currentURI.resolve(link));
    }

    return handleLoginPage(user, loginPage);
}

From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java

private RawBankAccount obtainBankAccountFromHtmlTableRow(String type, Element row) {
    if ("detail".equalsIgnoreCase(row.attr("class"))) {
        // detail row
        return null;
    }//from  w ww  .  j  av  a2  s .c o  m

    if ("bg0".equalsIgnoreCase(row.attr("class"))) {
        Log.v(TAG, "working row(" + type + "): " + row.html());

        if ("Current Accounts".equalsIgnoreCase(type)) {
            return new RawBankAccount().setServerId(row.child(2).text()).setName(row.child(0).child(0).text())
                    .setIBAN(row.child(2).text()).setCurrency(row.child(1).text())
                    .setBalance(Convert.strToFloat(row.child(3).text()))
                    .setAvailableBalance(Convert.strToFloat(row.child(4).text()));
        } else if ("Cards".equalsIgnoreCase(type)) {
            // skip cards for now
            return null;
        } else {
            // unknown type
            return null;
        }
    } else {
        return null;
    }
}

From source file:nl.phanos.liteliveresultsclient.LoginHandler.java

public List<NameValuePair> getFormParams(String html, String username, String password)
        throws UnsupportedEncodingException {

    Document doc = Jsoup.parse(html);

    // Google form id
    Element loginform = doc.getElementById("primarycontent");
    Elements inputElements = loginform.getElementsByTag("input");

    List<NameValuePair> paramList = new ArrayList<NameValuePair>();

    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");

        if (key.equals("email")) {
            value = username;//  w ww . j a  va2 s .  co m
        } else if (key.equals("password")) {
            value = password;
        }

        paramList.add(new BasicNameValuePair(key, value));

    }

    return paramList;
}

From source file:me.vertretungsplan.parser.UntisSubstitutionParser.java

@Override
public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {
    new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore);

    String encoding = data.optString(PARAM_ENCODING, null);
    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    int successfulSchedules = 0;
    HttpResponseException lastExceptionSchedule = null;
    for (String baseUrl : ParserUtils.handleUrlsWithDateFormat(urls)) {
        try {//from  w  w  w  . j  a  v a2 s .  c o m
            Document doc = Jsoup.parse(this.httpGet(baseUrl, encoding));
            Elements classes = doc.select("td a");

            String lastChange = doc.select("td[align=right]:not(:has(b))").text();

            int successfulClasses = 0;
            HttpResponseException lastExceptionClass = null;
            for (Element klasse : classes) {
                try {
                    Document classDoc = Jsoup.parse(
                            httpGet(baseUrl.substring(0, baseUrl.lastIndexOf("/")) + "/" + klasse.attr("href"),
                                    encoding));

                    parseSubstitutionTable(v, lastChange, classDoc);
                    successfulClasses++;
                } catch (HttpResponseException e) {
                    lastExceptionClass = e;
                }
            }
            if (successfulClasses == 0 && lastExceptionClass != null) {
                throw lastExceptionClass;
            }
            successfulSchedules++;
        } catch (HttpResponseException e) {
            lastExceptionSchedule = e;
        }
    }
    if (successfulSchedules == 0 && lastExceptionSchedule != null) {
        throw lastExceptionSchedule;
    }
    if (data.has(PARAM_WEBSITE)) {
        v.setWebsite(data.getString(PARAM_WEBSITE));
    } else {
        v.setWebsite(urls.get(0));
    }
    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());
    return v;
}

From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java

@Override
public Object run() {

    String origBody = contentManager.getDownstreamResponse();
    if (origBody == null || origBody.isEmpty()) {
        return null;
    }/*from  w w w . ja va 2  s  .  c  o m*/

    String composedBody = null;
    log.trace("Response from downstream server: " + origBody);

    Document doc = Jsoup.parse(origBody);
    if (hasReplaceableElements(doc)) {
        log.debug("We have replaceable elements. Let's get em!");
        Elements elementsToUpdate = doc.select("div[data-loc]");
        for (Element e : elementsToUpdate) {
            StringBuilder content = new StringBuilder();
            String location = e.dataset().get("loc");
            String fragmentName = e.dataset().get("fragment-name");
            String cacheName = e.dataset().get("cache-name");
            boolean useCaching = !Boolean.valueOf(e.dataset().get("disable-caching"));
            boolean failQuietly = Boolean.valueOf(e.dataset().get("fail-quietly"));
            URL url = null;
            try {
                url = new URL(location);
                String protocol = url.getProtocol();
                String service = url.getHost();

                log.debug("Fetching content at location [ " + location + " ] with cacheName = [ " + cacheName
                        + " ]");

                try {
                    RequestContext context = RequestContext.getCurrentContext();
                    ContentResponse response = contentManager.getContentFromService(location, cacheName,
                            useCaching, context);

                    log.trace(response.toString());

                    if (!response.isError()) {
                        Object resp = response.getContent();
                        if (String.class.isAssignableFrom(resp.getClass())) {
                            String subContentResponse = (String) resp;
                            //TODO You better trust the source of your downstream HTML!
                            //                    String cleanedContent = Jsoup.clean(subContentResponse, Whitelist.basic()); //this totally stripped the html out...
                            Document subDocument = Jsoup.parse(subContentResponse);

                            if (fragmentName != null) {
                                Elements fragments = subDocument
                                        .select("div[data-fragment-name=\"" + fragmentName + "\"]");

                                if (fragments != null && fragments.size() > 0) {
                                    if (fragments.size() == 1) {
                                        Element frag = fragments.first();

                                        //need to see if there are images that we need to replace the urls on
                                        Elements images = frag.select("img");
                                        for (Element i : images) {
                                            String src = i.attr("src");
                                            if (src.startsWith("/") && !src.startsWith("//")) {
                                                i.attr("src", "/cui-req://" + protocol + "://" + service + src);
                                            } //else what do we do about relative urls?
                                        }

                                        content.append(frag.toString());

                                    } else {
                                        for (Element frag : fragments) {
                                            content.append(frag.toString()).append("\n\n");
                                        }
                                    }
                                } else {
                                    log.debug("Found no matching fragments for [ " + fragmentName + " ]");
                                    if (failQuietly) {
                                        content.append("<div class='cui-error'></div>");
                                    } else {
                                        content.append(
                                                "<span class='cui-error'>Failed getting content from remote service. Possible reason in reponse below</span>");
                                        content.append(subDocument.toString());
                                    }
                                }
                            } else {
                                //take the whole thing and cram it in there!
                                content.append(subDocument.toString());
                            }
                        } else {
                            //not text...
                            if (!failQuietly) {
                                content.append(
                                        "<span class='cui-error'>Failed getting content from remote service. Reason: content was not text</span>");
                            } else {
                                content.append("<div class='cui-error'></div>");
                            }
                        }

                    } else {
                        if (!failQuietly) {
                            content.append(
                                    "<span class='cui-error'>Failed getting content from remote service. Reason: "
                                            + response.getMessage() + "</span>");
                        } else {
                            content.append("<div class='cui-error'></div>");
                        }
                    }

                    //now append it to the page
                    if (!content.toString().isEmpty()) {
                        e.html(content.toString());
                    }
                } catch (Throwable t) {
                    if (!failQuietly) {
                        e.html("<span class='cui-error'>Failed getting content from remote service. Reason: "
                                + t.getMessage() + "</span>");
                    }
                    log.warn("Failed replacing content", t);
                }
            } catch (MalformedURLException ex) {
                log.warn("location was invalid: [ " + location + " ]", ex);
                if (!failQuietly) {
                    content.append(
                            "<span class='cui-error'>Failed getting content from remote service. Reason: data-loc was an invalid location.</span>");
                } else {
                    content.append("<div class='cui-error'></div>");
                }
            }

        }

        composedBody = doc.toString();
    } else {
        log.debug("Document has no replaeable elements. Skipping");
    }

    try {
        addResponseHeaders();
        if (composedBody != null && !composedBody.isEmpty()) {
            writeResponse(composedBody, getMimeType(RequestContext.getCurrentContext()));
        } else {
            writeResponse(origBody, getMimeType(RequestContext.getCurrentContext()));
        }
    } catch (Exception ex) {
        log.error("Error sending response", ex);

    }
    return null;
}

From source file:me.vertretungsplan.parser.DSBMobileParser.java

private void loadScheduleFromUrl(SubstitutionSchedule v, String url, List<String> usedUrls)
        throws IOException, JSONException, CredentialInvalidException, IncompatibleScheduleException {
    usedUrls.add(url);/*from   ww  w.ja  v  a  2 s .c om*/
    String html = httpGet(url, data.has(PARAM_ENCODING) ? data.optString(PARAM_ENCODING, null) : "UTF-8");
    Document doc = Jsoup.parse(html);

    if (doc.title().toLowerCase().contains("untis") || doc.html().toLowerCase().contains("untis")
            || data.optString(PARAM_TYPE, "").equals("untis")) {
        parseMultipleMonitorDays(v, doc, data);
    } else if (doc.html().toLowerCase().contains("created by davinci")
            || data.optString(PARAM_TYPE, "").equals("davinci")) {
        Elements titles = doc.select("h2");
        Elements tables = doc.select("h2 + p + table");
        if (titles.size() != tables.size())
            throw new IOException("Anzahl berschriften != Anzahl Tabellen");
        for (int i = 0; i < titles.size(); i++) {
            SubstitutionScheduleDay day = new SubstitutionScheduleDay();
            String date = titles.get(i).text();
            day.setDateString(date);
            day.setDate(ParserUtils.parseDate(date));
            DaVinciParser.parseDaVinciTable(tables.get(i), v, day, colorProvider);
            v.addDay(day);
        }
    } else if (doc.select(".tdaktionen").size() > 0 || data.optString(PARAM_TYPE, "").equals("indiware")) {
        new IndiwareParser(scheduleData, cookieProvider).parseIndiwarePage(v, doc.html());
    } else if (doc.text().matches(".*Fr diesen Bereich.*wurde kein Inhalt bereitgestellt\\.")) {
        return;
    } else {
        throw new IncompatibleScheduleException();
    }

    if (doc.select("meta[http-equiv=refresh]").size() > 0) {
        Element meta = doc.select("meta[http-equiv=refresh]").first();
        String attr = meta.attr("content").toLowerCase();
        String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1)
                + attr.substring(attr.indexOf("url=") + 4);
        if (!usedUrls.contains(redirectUrl)) {
            loadScheduleFromUrl(v, redirectUrl, usedUrls);
        }
    }
}