List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", "")); Element select = navbarDoc.select("select[name=week]").first(); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); String info = navbarDoc.select(".description").text(); String stand;//from w w w . ja v a 2 s . co m try { stand = info.substring(info.indexOf("Stand:")); } catch (Exception e) { stand = ""; } for (Element option : select.children()) { String week = option.attr("value"); String letter = data.optString("letter", "w"); if (data.optBoolean("single_classes", false)) { int classNumber = 1; for (String klasse : getAllClasses()) { String paddedNumber = String.format("%05d", classNumber); String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } writeTagByDatum(tage, tag); } classNumber++; } } else { String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } } v.setTage(tage); } return v; }
From source file:me.vertretungsplan.parser.UntisMonitorParser.java
private void loadUrl(String url, String encoding, boolean following, List<Document> docs, String startUrl, int recursionDepth) throws IOException, CredentialInvalidException { String html;/* w ww .jav a 2 s. c o m*/ if (url.equals(VALUE_URL_LOGIN_RESPONSE)) { html = loginResponse; } else { try { html = httpGet(url, encoding).replace(" ", ""); } catch (HttpResponseException e) { if (docs.size() == 0) { throw e; } else { return; // ignore if first page was loaded and redirect didn't work } } } Document doc = Jsoup.parse(html); doc.setBaseUri(url); if (doc.select(".mon_title").size() == 0) { // We have a problem - there seems to be no substitution schedule. Maybe it is hiding // inside a frame? if (doc.select("frameset frame[name").size() > 0) { for (Element frame : doc.select("frameset frame")) { if (frame.attr("src").matches(".*subst_\\d\\d\\d.html?") && recursionDepth < MAX_RECURSION_DEPTH) { String frameUrl = frame.absUrl("src"); loadUrl(frame.absUrl("src"), encoding, following, docs, frameUrl, recursionDepth + 1); } } } else if (doc.text().contains("registriert")) { throw new CredentialInvalidException(); } else { if (docs.size() == 0) { // ignore if first page was loaded and redirect didn't work throw new IOException( "Could not find .mon-title, seems like there is no Untis " + "schedule here"); } } } else { findSubDocs(docs, html, doc); if (following && doc.select("meta[http-equiv=refresh]").size() > 0) { Element meta = doc.select("meta[http-equiv=refresh]").first(); String attr = meta.attr("content").toLowerCase(); String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1) + attr.substring(attr.indexOf("url=") + 4); if (!redirectUrl.equals(startUrl) && recursionDepth < MAX_RECURSION_DEPTH) { loadUrl(redirectUrl, encoding, true, docs, startUrl, recursionDepth + 1); } } } }
From source file:me.vertretungsplan.parser.DSBLightParser.java
private void parseDay(String url, Map<String, String> referer, SubstitutionSchedule schedule, String startUrl) throws IOException, JSONException, CredentialInvalidException { String html = httpGet(url, data.optString(PARAM_ENCODING, null), referer); Document doc = Jsoup.parse(html); if (doc.title().toLowerCase().contains("untis") || doc.html().toLowerCase().contains("untis") || doc.select(".mon_list").size() > 0) { parseMultipleMonitorDays(schedule, doc, data); if (doc.select("meta[http-equiv=refresh]").size() > 0) { Element meta = doc.select("meta[http-equiv=refresh]").first(); String attr = meta.attr("content").toLowerCase(); String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1) + attr.substring(attr.indexOf("url=") + 4); if (!redirectUrl.equals(startUrl)) { parseDay(redirectUrl, referer, schedule, startUrl); }/*from w ww . j a v a 2 s .com*/ } } }
From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java
/** * Gets the image captions in a map keyed by itemID_attachmentID * The key is saved also in the <img> tag's "alt" attribute for later use from word * @param doc/* ww w . j a v a2 s. co m*/ * @param personID * @param imageCaptionsMap * @return */ private String getImageCaptions(Document doc, Integer personID, Map<String, ImageOrTableCaption> imageCaptionsMap) { Elements imgElements = doc.select("img"); if (imgElements != null) { for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) { Element imageElement = iterator.next(); String sourceAttribute = imageElement.attr("src"); String style = imageElement.attr("style"); //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space imageElement.removeAttr("width"); imageElement.removeAttr("height"); ALIGN align = null; if (style != null) { if (style.contains("float:left")) { align = ALIGN.LEFT; } else { if (style.contains("float:right")) { align = ALIGN.RIGHT; } } } String altAttribute = imageElement.attr("alt"); Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID); if (map != null) { imageElement.attr("src", map.get("temporaryFilePath")); //save imageCaption into the map and now use the "alt" attribute for storing the merged key //which will be transformed in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side String imageCaption = null; if (altAttribute != null && !"".equals(altAttribute)) { //probably from previously removed figcaption but it may also be explicitly set imageCaption = altAttribute; } else { imageCaption = map.get("description"); } globalCounter++; counterWithinChapter++; imageElement.attr("alt", String.valueOf(globalCounter)); if (imageCaption == null) { //add anyway to the map even as empty string because this marks the image to be added to the List of figures imageCaption = ""; } imageCaptionsMap.put(String.valueOf(globalCounter), new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align)); } } } return doc.body().html(); }
From source file:com.shalzz.attendance.activity.LoginActivity.java
private Response.Listener<String> getHiddenDataSuccessListener() { return new Response.Listener<String>() { @Override//w w w . j av a 2s .co m public void onResponse(String response) { Log.i(getClass().getName(), "Collected hidden data."); Document doc = Jsoup.parse(response); Log.i(getClass().getName(), "Parsing hidden data..."); // Get Hidden values Elements hiddenvalues = doc.select("input[type=hidden]"); for (Element hiddenvalue : hiddenvalues) { String name = hiddenvalue.attr("name"); String val = hiddenvalue.attr("value"); if (name.length() != 0 && val.length() != 0) { data.put(name, val); } } Log.i(getClass().getName(), "Parsed hidden data."); } }; }
From source file:me.vertretungsplan.parser.DSBLightParser.java
private void parseProgram(String url, String html, SubstitutionSchedule schedule, Map<String, String> referer, String firstUrl) throws IOException, JSONException, CredentialInvalidException { Document doc = Jsoup.parse(html, url); if (doc.select("iframe").attr("src").equals(firstUrl) || doc.select("iframe").size() == 0) { return;//from w ww . jav a 2s .co m } for (Element iframe : doc.select("iframe")) { // Data parseDay(iframe.attr("src"), referer, schedule, iframe.attr("src")); } if (firstUrl == null) { firstUrl = doc.select("iframe").attr("src"); } if (doc.select("#hlNext").size() > 0) { String nextUrl = doc.select("#hlNext").first().attr("abs:href"); try { String response = httpGet(nextUrl, ENCODING, referer); parseProgram(response, nextUrl, schedule, referer, firstUrl); } catch (HttpResponseException ignored) { } } if (html.contains("Timer1")) { List<Connection.KeyVal> formData = ((FormElement) doc.select("form").first()).formData(); List<NameValuePair> formParams = new ArrayList<>(); for (Connection.KeyVal kv : formData) { formParams.add(new BasicNameValuePair(kv.key(), kv.value())); } formParams.add(new BasicNameValuePair("__EVENTTARGET", "Timer1")); formParams.add(new BasicNameValuePair("__EVENTARGUMENT", "")); String response = httpPost(url, ENCODING, formParams, referer); parseProgram(url, response, schedule, referer, firstUrl); } }
From source file:mg.jerytodik.business.service.impl.JeryTodikSourceServiceImpl.java
/** * {@inheritDoc}//from www . j ava 2 s . c o m */ @Override public void archiveResource(final JeryTodikSource jerytodikSource) throws JerytodikException { validateSource(jerytodikSource); try { String principalResourceContent = getPrincipalResourceContent(jerytodikSource); String rootResourceFolderName = createRootResourceFolderName(jerytodikSource.getUrl()); final Elements cssLinks = getCssLinks(jerytodikSource.getUrl()); rootResourceFolderName += File.separator + createSubFolderName(); // Archiver les fichiers css relatifs a la page d'acceuil ... final String resourceFolderName = rootResourceFolderName + File.separator + archiveResourceFolderName; archiveResourceFolderName = archiveResourceFolderName + JerytodikUtil.SLASH_CHAR; LOGGER.info(JerytodikUtil.LINE); LOGGER.info("Archiving resources from {} ...", jerytodikSource.getUrl()); LOGGER.info(JerytodikUtil.LINE); for (Element link : cssLinks) { final String resourceUrl = link.attr("abs:href"); if (resourceUrl.toLowerCase().contains("css")) { final String resourceFileName = resourceUrl .substring(resourceUrl.lastIndexOf(JerytodikUtil.SLASH_CHAR) + 1); principalResourceContent = principalResourceContent.replace(link.attr("href"), archiveResourceFolderName + resourceFileName); JeryTodikSource cssSource = new JeryTodikSource(); cssSource.setUrl(resourceUrl); cssSource.setName(resourceFileName); final String cssSourceContent = getPrincipalResourceContent(cssSource); LOGGER.info("\t-{}", resourceFileName); writeInFile(resourceFolderName, cssSourceContent, resourceFileName); } } // Archiver la page d'acceuil ... LOGGER.info("\t-{}", welcomeFileName); writeInFile(rootResourceFolderName, principalResourceContent, welcomeFileName); addHistory(HistoryUtil.ARCHIVE_OK, jerytodikSource); } catch (IOException e) { addHistory(HistoryUtil.ARCHIVE_KO, jerytodikSource); throw new JerytodikException(e.getMessage()); } }
From source file:br.ufsc.das.gtscted.shibbauth.Connection.java
public String authenticate(String wayfLocation, String wayfActionPath, String idpUrl, String username, String password) throws ClientProtocolException, IOException { //POST para o WAYF passando o idp escolhido HttpPost httpPost1 = new HttpPost(wayfLocation + wayfActionPath); List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(); nameValuePairs.add(new BasicNameValuePair("user_idp", idpUrl)); httpPost1.setEntity(new UrlEncodedFormEntity(nameValuePairs, HTTP.UTF_8)); HttpResponse response1 = httpClient.execute(httpPost1); String strResponse1 = readResponse(response1.getEntity().getContent()).toString(); //----------------------------------------------- //Obtm o caminho indicado no campo "action" da pgina do idp (/idp/Authn/UserPassword) Document idpDoc = Jsoup.parse(strResponse1); Element idpFormElement = idpDoc.select("form").get(0); String idpActionPath = idpFormElement.attr("action"); //POST para o idp escolhido (por exemplo https://idpstcfed.sj.ifsc.edu.br/idp/Authn/UserPassword) // passando o usuario (j_username) e a senha (j_password) HttpPost httpPost2 = new HttpPost(idpUrl.replace("/idp/shibboleth", idpActionPath)); List<NameValuePair> nameValuePairs2 = new ArrayList<NameValuePair>(); nameValuePairs2.add(new BasicNameValuePair("j_username", username)); nameValuePairs2.add(new BasicNameValuePair("j_password", password)); httpPost2.setEntity(new UrlEncodedFormEntity(nameValuePairs2, HTTP.UTF_8)); HttpResponse response2 = httpClient.execute(httpPost2); String strResponse2 = readResponse(response2.getEntity().getContent()).toString(); //----------------------------------------------- // Obtm os elementos que sero passados para o SP para criar o security context Document authResponseDoc = Jsoup.parse(strResponse2); Element authResponseFormElement = authResponseDoc.select("form").get(0); Element relayStateElement = authResponseDoc.select("input").get(0); Element SAMLResponseElement = authResponseDoc.select("input").get(1); String action = authResponseFormElement.attr("action"); String relayStateValue = relayStateElement.attr("value"); String SAMLResponseValue = SAMLResponseElement.attr("value"); // POST para o "assertion consumer" no SP, indicado no campo "action" da resposta // recebida aps a autenticacao. Este POST contm dois valores: RelayState e // SAMLResponse. HttpPost httpPost3 = new HttpPost(action); List<NameValuePair> nameValuePairs3 = new ArrayList<NameValuePair>(); nameValuePairs3.add(new BasicNameValuePair("RelayState", relayStateValue)); nameValuePairs3.add(new BasicNameValuePair("SAMLResponse", SAMLResponseValue)); httpPost3.setEntity(new UrlEncodedFormEntity(nameValuePairs3, HTTP.UTF_8)); HttpResponse response3 = httpClient.execute(httpPost3); return readResponse(response3.getEntity().getContent()).toString(); }
From source file:com.liato.bankdroid.banking.banks.PayPal.java
@Override protected LoginPackage preLogin() throws BankException, ClientProtocolException, IOException { urlopen = new Urllib(context, CertificateReader.getCertificates(context, R.raw.cert_paypal)); urlopen.setUserAgent(//from ww w. j a va 2 s .co m "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36"); //Get cookies and url to post to response = urlopen.open("https://www.paypal.com/en"); Document d = Jsoup.parse(response); Element e = d.select("form[name=login_form]").first(); String strPostUrl; if (e != null && !TextUtils.isEmpty(e.attr("action"))) { strPostUrl = e.attr("action").trim(); } else { throw new BankException(res.getText(R.string.unable_to_find).toString() + " post url."); } List<NameValuePair> postData = new ArrayList<NameValuePair>(); postData.add(new BasicNameValuePair("login_email", username)); postData.add(new BasicNameValuePair("login_password", password)); postData.add(new BasicNameValuePair("target_page", "0")); postData.add(new BasicNameValuePair("submit.x", "Log In")); postData.add(new BasicNameValuePair("form_charset", "UTF-8")); postData.add(new BasicNameValuePair("browser_name", "undefined")); postData.add(new BasicNameValuePair("browser_version", "undefined")); postData.add(new BasicNameValuePair("operating_system", "Windows")); postData.add(new BasicNameValuePair("bp_mid", "v=1;a1=na~a2=na~a3=na~a4=Mozilla~a5=Netscape~a6=5.0 (Windows; en-US)~a7=20100713~a8=na~a9=true~a10=Windows NT 6.1~a11=true~a12=Win32~a13=na~a14=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.7) Gecko/20100713 Firefox/3.6.7 ( .NET CLR 3.5.30729; .NET4.0C)~a15=true~a16=en-US~a17=na~a18=www.paypal.com~a19=na~a20=na~a21=na~a22=na~a23=1280~a24=720~a25=24~a26=658~a27=na~a28=Sun Oct 31 2010 18:41:07 GMT 0100~a29=1~a30=def|qt1|qt2|qt3|qt4|qt5|qt6|swf|~a31=yes~a32=na~a33=na~a34=no~a35=no~a36=yes~a37=no~a38=online~a39=no~a40=Windows NT 6.1~a41=no~a42=no~")); postData.add(new BasicNameValuePair("bp_ks1", "v=1;l=16;Di0:2663Di1:48Ui0:15Ui1:81Di2:176Di3:48Ui2:32Ui3:96Di4:384Ui4:48Di5:352Ui5:48Di6:128Ui6:80Di7:112Ui7:48Di8:113Ui8:79Di9:125Ui9:51Di10:98Ui10:72Di11:227Ui11:51Di12:80Ui12:80Di13:128Ui13:64Di14:48Ui14:80Di15:416Ui15:80")); postData.add(new BasicNameValuePair("bp_ks2", "")); postData.add(new BasicNameValuePair("bp_ks3", "")); postData.add(new BasicNameValuePair("flow_name", "xpt/Marketing_CommandDriven/homepage/IndividualsHome")); postData.add(new BasicNameValuePair("fso", "k2TDENTlxEJnhbuYDYFmKMyVq0kUZPsdK6j3V1gPUwuZvyAmzzpRs4Cmjet0z19AwlxXfW")); return new LoginPackage(urlopen, postData, response, strPostUrl); }
From source file:com.mycompany.searchengineaggregator.SearchEngineAggregator.java
public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException { ArrayList<JSONObject> result = new ArrayList<>(); StringBuilder url = new StringBuilder(); Document doc = null;//from w w w.j a va 2 s . c o m String userAgent = null; //Handles specific search engine connection details switch (searchEngine) { case Google: url.append("https://www.google.com/search?q="); url.append(query); userAgent = GoogleUserAgent; break; case Yahoo: url.append("https://search.yahoo.com/search?q="); url.append(query); userAgent = YahooUserAgent; break; case Bing: url.append("http://www.bing.com/search?q="); url.append(query); userAgent = BingUserAgent; break; } if (userAgent == null) return null; try { //Creates a connection, and fetches and parses the HTML file doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token") .timeout(3000).get(); } catch (IOException ex) { Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex); } if (doc == null) return null; //Get all links with attribute href Elements links = doc.select("a[href]"); int i = 1; //For every link, check if url was outbound, strip non-url substring and append to result for (Element link : links) { String tempLink = link.attr("href"); String tempText = link.text(); //Handles specific search engine result filters switch (searchEngine) { case Google: if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) { tempLink = tempLink.replace("/url?q=", ""); result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; } break; case Yahoo: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } case Bing: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } } } return result; }