Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", ""));
    Element select = navbarDoc.select("select[name=week]").first();

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();

    String info = navbarDoc.select(".description").text();
    String stand;//from   w  w w  . ja v a 2  s  .  co m
    try {
        stand = info.substring(info.indexOf("Stand:"));
    } catch (Exception e) {
        stand = "";
    }

    for (Element option : select.children()) {
        String week = option.attr("value");
        String letter = data.optString("letter", "w");
        if (data.optBoolean("single_classes", false)) {
            int classNumber = 1;
            for (String klasse : getAllClasses()) {
                String paddedNumber = String.format("%05d", classNumber);
                String url;
                if (data.optBoolean("w_after_number", false))
                    url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm";
                else
                    url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm";

                Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
                Elements days = doc.select("#vertretung > p > b, #vertretung > b");
                for (Element day : days) {
                    VertretungsplanTag tag = getTagByDatum(tage, day.text());
                    tag.setStand(stand);
                    tag.setDatum(day.text());
                    Element next = null;
                    if (day.parent().tagName().equals("p")) {
                        next = day.parent().nextElementSibling().nextElementSibling();
                    } else
                        next = day.parent().select("p").first().nextElementSibling();
                    if (next.className().equals("subst")) {
                        //Vertretungstabelle
                        if (next.text().contains("Vertretungen sind nicht freigegeben"))
                            continue;
                        parseVertretungsplanTable(next, data, tag);
                    } else {
                        //Nachrichten
                        parseNachrichten(next, data, tag);
                        next = next.nextElementSibling().nextElementSibling();
                        parseVertretungsplanTable(next, data, tag);
                    }
                    writeTagByDatum(tage, tag);
                }

                classNumber++;
            }
        } else {
            String url;
            if (data.optBoolean("w_after_number", false))
                url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm";
            else
                url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm";
            Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
            Elements days = doc.select("#vertretung > p > b, #vertretung > b");
            for (Element day : days) {
                VertretungsplanTag tag = getTagByDatum(tage, day.text());
                tag.setStand(stand);
                tag.setDatum(day.text());
                Element next = null;
                if (day.parent().tagName().equals("p")) {
                    next = day.parent().nextElementSibling().nextElementSibling();
                } else
                    next = day.parent().select("p").first().nextElementSibling();
                if (next.className().equals("subst")) {
                    //Vertretungstabelle
                    if (next.text().contains("Vertretungen sind nicht freigegeben"))
                        continue;
                    parseVertretungsplanTable(next, data, tag);
                } else {
                    //Nachrichten
                    parseNachrichten(next, data, tag);
                    next = next.nextElementSibling().nextElementSibling();
                    parseVertretungsplanTable(next, data, tag);
                }
                tage.add(tag);
            }
        }
        v.setTage(tage);
    }
    return v;
}

From source file:me.vertretungsplan.parser.UntisMonitorParser.java

private void loadUrl(String url, String encoding, boolean following, List<Document> docs, String startUrl,
        int recursionDepth) throws IOException, CredentialInvalidException {
    String html;/*  w  ww  .jav a 2 s. c o  m*/
    if (url.equals(VALUE_URL_LOGIN_RESPONSE)) {
        html = loginResponse;
    } else {
        try {
            html = httpGet(url, encoding).replace("&nbsp;", "");
        } catch (HttpResponseException e) {
            if (docs.size() == 0) {
                throw e;
            } else {
                return; // ignore if first page was loaded and redirect didn't work
            }
        }
    }
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(url);

    if (doc.select(".mon_title").size() == 0) {
        // We have a problem - there seems to be no substitution schedule. Maybe it is hiding
        // inside a frame?
        if (doc.select("frameset frame[name").size() > 0) {
            for (Element frame : doc.select("frameset frame")) {
                if (frame.attr("src").matches(".*subst_\\d\\d\\d.html?")
                        && recursionDepth < MAX_RECURSION_DEPTH) {
                    String frameUrl = frame.absUrl("src");
                    loadUrl(frame.absUrl("src"), encoding, following, docs, frameUrl, recursionDepth + 1);
                }
            }
        } else if (doc.text().contains("registriert")) {
            throw new CredentialInvalidException();
        } else {
            if (docs.size() == 0) {
                // ignore if first page was loaded and redirect didn't work
                throw new IOException(
                        "Could not find .mon-title, seems like there is no Untis " + "schedule here");
            }
        }
    } else {
        findSubDocs(docs, html, doc);

        if (following && doc.select("meta[http-equiv=refresh]").size() > 0) {
            Element meta = doc.select("meta[http-equiv=refresh]").first();
            String attr = meta.attr("content").toLowerCase();
            String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1)
                    + attr.substring(attr.indexOf("url=") + 4);
            if (!redirectUrl.equals(startUrl) && recursionDepth < MAX_RECURSION_DEPTH) {
                loadUrl(redirectUrl, encoding, true, docs, startUrl, recursionDepth + 1);
            }
        }
    }
}

From source file:me.vertretungsplan.parser.DSBLightParser.java

private void parseDay(String url, Map<String, String> referer, SubstitutionSchedule schedule, String startUrl)
        throws IOException, JSONException, CredentialInvalidException {
    String html = httpGet(url, data.optString(PARAM_ENCODING, null), referer);
    Document doc = Jsoup.parse(html);
    if (doc.title().toLowerCase().contains("untis") || doc.html().toLowerCase().contains("untis")
            || doc.select(".mon_list").size() > 0) {
        parseMultipleMonitorDays(schedule, doc, data);
        if (doc.select("meta[http-equiv=refresh]").size() > 0) {
            Element meta = doc.select("meta[http-equiv=refresh]").first();
            String attr = meta.attr("content").toLowerCase();
            String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1)
                    + attr.substring(attr.indexOf("url=") + 4);
            if (!redirectUrl.equals(startUrl)) {
                parseDay(redirectUrl, referer, schedule, startUrl);
            }/*from w  ww . j  a  v  a  2 s  .com*/
        }
    }
}

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

/**
 * Gets the image captions in a map keyed by itemID_attachmentID
 * The key is saved also in the <img> tag's "alt" attribute for later use from word
 * @param doc/*  ww  w .  j  a v a2 s.  co m*/
 * @param personID
 * @param imageCaptionsMap
 * @return
 */
private String getImageCaptions(Document doc, Integer personID,
        Map<String, ImageOrTableCaption> imageCaptionsMap) {
    Elements imgElements = doc.select("img");
    if (imgElements != null) {
        for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) {
            Element imageElement = iterator.next();
            String sourceAttribute = imageElement.attr("src");
            String style = imageElement.attr("style");
            //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space
            imageElement.removeAttr("width");
            imageElement.removeAttr("height");
            ALIGN align = null;
            if (style != null) {
                if (style.contains("float:left")) {
                    align = ALIGN.LEFT;
                } else {
                    if (style.contains("float:right")) {
                        align = ALIGN.RIGHT;
                    }
                }
            }
            String altAttribute = imageElement.attr("alt");
            Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID);
            if (map != null) {
                imageElement.attr("src", map.get("temporaryFilePath"));
                //save imageCaption into the map and now use the "alt" attribute for storing the merged key
                //which will be transformed  in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side
                String imageCaption = null;
                if (altAttribute != null && !"".equals(altAttribute)) {
                    //probably from previously removed figcaption but it may also be explicitly set
                    imageCaption = altAttribute;
                } else {
                    imageCaption = map.get("description");
                }
                globalCounter++;
                counterWithinChapter++;
                imageElement.attr("alt", String.valueOf(globalCounter));
                if (imageCaption == null) {
                    //add anyway to the map even as empty string because this marks the image to be added to the List of figures 
                    imageCaption = "";
                }
                imageCaptionsMap.put(String.valueOf(globalCounter),
                        new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align));
            }
        }
    }
    return doc.body().html();
}

From source file:com.shalzz.attendance.activity.LoginActivity.java

private Response.Listener<String> getHiddenDataSuccessListener() {
    return new Response.Listener<String>() {
        @Override//w w w . j  av a 2s .co m
        public void onResponse(String response) {

            Log.i(getClass().getName(), "Collected hidden data.");
            Document doc = Jsoup.parse(response);
            Log.i(getClass().getName(), "Parsing hidden data...");

            // Get Hidden values
            Elements hiddenvalues = doc.select("input[type=hidden]");
            for (Element hiddenvalue : hiddenvalues) {
                String name = hiddenvalue.attr("name");
                String val = hiddenvalue.attr("value");
                if (name.length() != 0 && val.length() != 0) {
                    data.put(name, val);
                }
            }
            Log.i(getClass().getName(), "Parsed hidden data.");
        }
    };
}

From source file:me.vertretungsplan.parser.DSBLightParser.java

private void parseProgram(String url, String html, SubstitutionSchedule schedule, Map<String, String> referer,
        String firstUrl) throws IOException, JSONException, CredentialInvalidException {
    Document doc = Jsoup.parse(html, url);
    if (doc.select("iframe").attr("src").equals(firstUrl) || doc.select("iframe").size() == 0) {
        return;//from   w  ww  . jav  a 2s  .co  m
    }
    for (Element iframe : doc.select("iframe")) {
        // Data
        parseDay(iframe.attr("src"), referer, schedule, iframe.attr("src"));
    }
    if (firstUrl == null) {
        firstUrl = doc.select("iframe").attr("src");
    }
    if (doc.select("#hlNext").size() > 0) {
        String nextUrl = doc.select("#hlNext").first().attr("abs:href");
        try {
            String response = httpGet(nextUrl, ENCODING, referer);
            parseProgram(response, nextUrl, schedule, referer, firstUrl);
        } catch (HttpResponseException ignored) {

        }
    }
    if (html.contains("Timer1")) {
        List<Connection.KeyVal> formData = ((FormElement) doc.select("form").first()).formData();
        List<NameValuePair> formParams = new ArrayList<>();
        for (Connection.KeyVal kv : formData) {
            formParams.add(new BasicNameValuePair(kv.key(), kv.value()));
        }
        formParams.add(new BasicNameValuePair("__EVENTTARGET", "Timer1"));
        formParams.add(new BasicNameValuePair("__EVENTARGUMENT", ""));
        String response = httpPost(url, ENCODING, formParams, referer);
        parseProgram(url, response, schedule, referer, firstUrl);
    }
}

From source file:mg.jerytodik.business.service.impl.JeryTodikSourceServiceImpl.java

/**
 * {@inheritDoc}//from   www  . j ava  2 s  .  c  o  m
 */
@Override
public void archiveResource(final JeryTodikSource jerytodikSource) throws JerytodikException {

    validateSource(jerytodikSource);

    try {

        String principalResourceContent = getPrincipalResourceContent(jerytodikSource);
        String rootResourceFolderName = createRootResourceFolderName(jerytodikSource.getUrl());
        final Elements cssLinks = getCssLinks(jerytodikSource.getUrl());

        rootResourceFolderName += File.separator + createSubFolderName();

        // Archiver les fichiers css relatifs a la page d'acceuil ...
        final String resourceFolderName = rootResourceFolderName + File.separator + archiveResourceFolderName;

        archiveResourceFolderName = archiveResourceFolderName + JerytodikUtil.SLASH_CHAR;

        LOGGER.info(JerytodikUtil.LINE);
        LOGGER.info("Archiving resources from {} ...", jerytodikSource.getUrl());
        LOGGER.info(JerytodikUtil.LINE);

        for (Element link : cssLinks) {

            final String resourceUrl = link.attr("abs:href");
            if (resourceUrl.toLowerCase().contains("css")) {

                final String resourceFileName = resourceUrl
                        .substring(resourceUrl.lastIndexOf(JerytodikUtil.SLASH_CHAR) + 1);

                principalResourceContent = principalResourceContent.replace(link.attr("href"),
                        archiveResourceFolderName + resourceFileName);

                JeryTodikSource cssSource = new JeryTodikSource();
                cssSource.setUrl(resourceUrl);
                cssSource.setName(resourceFileName);

                final String cssSourceContent = getPrincipalResourceContent(cssSource);
                LOGGER.info("\t-{}", resourceFileName);
                writeInFile(resourceFolderName, cssSourceContent, resourceFileName);
            }
        }

        // Archiver la page d'acceuil ...
        LOGGER.info("\t-{}", welcomeFileName);
        writeInFile(rootResourceFolderName, principalResourceContent, welcomeFileName);

        addHistory(HistoryUtil.ARCHIVE_OK, jerytodikSource);

    } catch (IOException e) {

        addHistory(HistoryUtil.ARCHIVE_KO, jerytodikSource);
        throw new JerytodikException(e.getMessage());
    }

}

From source file:br.ufsc.das.gtscted.shibbauth.Connection.java

public String authenticate(String wayfLocation, String wayfActionPath, String idpUrl, String username,
        String password) throws ClientProtocolException, IOException {

    //POST para o WAYF passando o idp escolhido
    HttpPost httpPost1 = new HttpPost(wayfLocation + wayfActionPath);
    List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>();
    nameValuePairs.add(new BasicNameValuePair("user_idp", idpUrl));
    httpPost1.setEntity(new UrlEncodedFormEntity(nameValuePairs, HTTP.UTF_8));
    HttpResponse response1 = httpClient.execute(httpPost1);
    String strResponse1 = readResponse(response1.getEntity().getContent()).toString();
    //-----------------------------------------------

    //Obtm o caminho indicado no campo "action" da pgina do idp (/idp/Authn/UserPassword)
    Document idpDoc = Jsoup.parse(strResponse1);
    Element idpFormElement = idpDoc.select("form").get(0);
    String idpActionPath = idpFormElement.attr("action");

    //POST para o idp escolhido (por exemplo https://idpstcfed.sj.ifsc.edu.br/idp/Authn/UserPassword)
    // passando o usuario (j_username) e a senha (j_password)
    HttpPost httpPost2 = new HttpPost(idpUrl.replace("/idp/shibboleth", idpActionPath));
    List<NameValuePair> nameValuePairs2 = new ArrayList<NameValuePair>();
    nameValuePairs2.add(new BasicNameValuePair("j_username", username));
    nameValuePairs2.add(new BasicNameValuePair("j_password", password));
    httpPost2.setEntity(new UrlEncodedFormEntity(nameValuePairs2, HTTP.UTF_8));
    HttpResponse response2 = httpClient.execute(httpPost2);
    String strResponse2 = readResponse(response2.getEntity().getContent()).toString();
    //-----------------------------------------------

    // Obtm os elementos que sero passados para o SP para criar o security context
    Document authResponseDoc = Jsoup.parse(strResponse2);
    Element authResponseFormElement = authResponseDoc.select("form").get(0);
    Element relayStateElement = authResponseDoc.select("input").get(0);
    Element SAMLResponseElement = authResponseDoc.select("input").get(1);
    String action = authResponseFormElement.attr("action");
    String relayStateValue = relayStateElement.attr("value");
    String SAMLResponseValue = SAMLResponseElement.attr("value");

    // POST para o "assertion consumer" no SP, indicado no campo "action" da resposta
    // recebida aps a autenticacao. Este POST contm dois valores: RelayState e 
    // SAMLResponse.
    HttpPost httpPost3 = new HttpPost(action);
    List<NameValuePair> nameValuePairs3 = new ArrayList<NameValuePair>();
    nameValuePairs3.add(new BasicNameValuePair("RelayState", relayStateValue));
    nameValuePairs3.add(new BasicNameValuePair("SAMLResponse", SAMLResponseValue));
    httpPost3.setEntity(new UrlEncodedFormEntity(nameValuePairs3, HTTP.UTF_8));
    HttpResponse response3 = httpClient.execute(httpPost3);
    return readResponse(response3.getEntity().getContent()).toString();
}

From source file:com.liato.bankdroid.banking.banks.PayPal.java

@Override
protected LoginPackage preLogin() throws BankException, ClientProtocolException, IOException {
    urlopen = new Urllib(context, CertificateReader.getCertificates(context, R.raw.cert_paypal));
    urlopen.setUserAgent(//from   ww w. j a  va 2 s .co m
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36");
    //Get cookies and url to post to
    response = urlopen.open("https://www.paypal.com/en");
    Document d = Jsoup.parse(response);
    Element e = d.select("form[name=login_form]").first();

    String strPostUrl;
    if (e != null && !TextUtils.isEmpty(e.attr("action"))) {
        strPostUrl = e.attr("action").trim();
    } else {
        throw new BankException(res.getText(R.string.unable_to_find).toString() + " post url.");
    }
    List<NameValuePair> postData = new ArrayList<NameValuePair>();
    postData.add(new BasicNameValuePair("login_email", username));
    postData.add(new BasicNameValuePair("login_password", password));
    postData.add(new BasicNameValuePair("target_page", "0"));
    postData.add(new BasicNameValuePair("submit.x", "Log In"));
    postData.add(new BasicNameValuePair("form_charset", "UTF-8"));
    postData.add(new BasicNameValuePair("browser_name", "undefined"));
    postData.add(new BasicNameValuePair("browser_version", "undefined"));
    postData.add(new BasicNameValuePair("operating_system", "Windows"));
    postData.add(new BasicNameValuePair("bp_mid",
            "v=1;a1=na~a2=na~a3=na~a4=Mozilla~a5=Netscape~a6=5.0 (Windows; en-US)~a7=20100713~a8=na~a9=true~a10=Windows NT 6.1~a11=true~a12=Win32~a13=na~a14=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.7) Gecko/20100713 Firefox/3.6.7 ( .NET CLR 3.5.30729; .NET4.0C)~a15=true~a16=en-US~a17=na~a18=www.paypal.com~a19=na~a20=na~a21=na~a22=na~a23=1280~a24=720~a25=24~a26=658~a27=na~a28=Sun Oct 31 2010 18:41:07 GMT 0100~a29=1~a30=def|qt1|qt2|qt3|qt4|qt5|qt6|swf|~a31=yes~a32=na~a33=na~a34=no~a35=no~a36=yes~a37=no~a38=online~a39=no~a40=Windows NT 6.1~a41=no~a42=no~"));
    postData.add(new BasicNameValuePair("bp_ks1",
            "v=1;l=16;Di0:2663Di1:48Ui0:15Ui1:81Di2:176Di3:48Ui2:32Ui3:96Di4:384Ui4:48Di5:352Ui5:48Di6:128Ui6:80Di7:112Ui7:48Di8:113Ui8:79Di9:125Ui9:51Di10:98Ui10:72Di11:227Ui11:51Di12:80Ui12:80Di13:128Ui13:64Di14:48Ui14:80Di15:416Ui15:80"));
    postData.add(new BasicNameValuePair("bp_ks2", ""));
    postData.add(new BasicNameValuePair("bp_ks3", ""));
    postData.add(new BasicNameValuePair("flow_name", "xpt/Marketing_CommandDriven/homepage/IndividualsHome"));
    postData.add(new BasicNameValuePair("fso",
            "k2TDENTlxEJnhbuYDYFmKMyVq0kUZPsdK6j3V1gPUwuZvyAmzzpRs4Cmjet0z19AwlxXfW"));
    return new LoginPackage(urlopen, postData, response, strPostUrl);
}

From source file:com.mycompany.searchengineaggregator.SearchEngineAggregator.java

public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException {

    ArrayList<JSONObject> result = new ArrayList<>();
    StringBuilder url = new StringBuilder();
    Document doc = null;//from w w  w.j  a  va 2 s .  c  o m
    String userAgent = null;

    //Handles specific search engine connection details
    switch (searchEngine) {
    case Google:
        url.append("https://www.google.com/search?q=");
        url.append(query);
        userAgent = GoogleUserAgent;
        break;
    case Yahoo:
        url.append("https://search.yahoo.com/search?q=");
        url.append(query);
        userAgent = YahooUserAgent;
        break;
    case Bing:
        url.append("http://www.bing.com/search?q=");
        url.append(query);
        userAgent = BingUserAgent;
        break;

    }

    if (userAgent == null)
        return null;

    try {
        //Creates a connection, and fetches and parses the HTML file
        doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token")
                .timeout(3000).get();

    } catch (IOException ex) {
        Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex);
    }

    if (doc == null)
        return null;

    //Get all links with attribute href
    Elements links = doc.select("a[href]");
    int i = 1;

    //For every link, check if url was outbound, strip non-url substring and append to result
    for (Element link : links) {

        String tempLink = link.attr("href");
        String tempText = link.text();

        //Handles specific search engine result filters
        switch (searchEngine) {
        case Google:
            if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) {
                tempLink = tempLink.replace("/url?q=", "");
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
            }
            break;
        case Yahoo:
            if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
                break;
            }
        case Bing:
            if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
                break;
            }
        }

    }

    return result;

}