List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:org.confab.PhpBB3Parser.java
public void postForumThread(Forum targetForum, Post newPost, User user) { Utilities.debug("postForumThread"); try {//from w ww . j a va 2 s . co m String reply_page = targetForum.rootURL() + "newthread.php?do=newthread&f=" + targetForum.id; Utilities.debug("GET: " + reply_page); HttpGet httpget = new HttpGet(reply_page); HttpResponse response = httpclient.execute(httpget, user.httpContext); HttpEntity entity = response.getEntity(); Document page = Jsoup.parse(EntityUtils.toString(entity)); EntityUtils.consume(entity); assert page != null; // TODO: need check to make sure we're on the right page. HttpEntity's // can just contain garbage and jsoup will still consume it // Make sure we're logged in before going any further Element username_box = page.select("input[name=vb_login_username]").first(); assert username_box == null; Element password_box = page.select("input[name=vb_login_password]").first(); assert password_box == null; // Construct POST HttpPost httppost = new HttpPost(targetForum.rootURL() + "newthread.php"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); // TODO: fix subject nvps.add(new BasicNameValuePair("subject", "hello world")); nvps.add(new BasicNameValuePair("message", newPost.message)); // Find the form - we can parse the rest of the needed elements from it Element reply_form = page.select("form[action*=newthread.php?do=postthread&f=]").first(); assert reply_form != null; String[] vals_array = { "s", "securitytoken", "f", "do", "posthash", "poststarttime", "loggedinuser" }; List<String> vals = Arrays.asList(vals_array); for (String val : vals) { Element el = reply_form.select("input[name=" + val + "]").first(); assert el != null : val; nvps.add(new BasicNameValuePair(val, el.attr("value"))); } httppost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); // Execute the POST Utilities.debug("Executing POST"); response = httpclient.execute(httppost, user.httpContext); Utilities.debug("POST response: " + response.getStatusLine()); assert response.getStatusLine().getStatusCode() == 302; } catch (IOException e) { System.out.println(e); } Utilities.debug("end postForumThread"); }
From source file:Leitura.Ecobertura.java
public void escreveTxt() throws IOException { //mtodo para pegar os nomes dos mtodos declarados String auxLinha = null;/* www.jav a 2 s. c om*/ char aux[] = null; StringBuffer sbClasse = new StringBuffer(); StringBuffer sbLinha = new StringBuffer(); StringBuffer sbMetodo = new StringBuffer(); String metodoTemp; boolean controleClasse = false; // Pega somente os elementos com tag "tr" Elements elements = document.getElementsByTag("tr"); for (Element children : elements) { if (StringUtils.isBlank(children.text())) { continue; } children.getElementsByClass("comment").remove(); // System.out.println(children.text()); //----------------- Dispensa Comentrios ----------------- //auxLinha = children.getElementsByTag("span").eq(0).text(); /*if (auxLinha.contains("/*")) { comentario = true; } else if(auxLinha.contains("//")){ comentario = true; controle = true; // controla comentrio com // } if (auxLinha.contains("*//*")) { comentario = false; }else if(auxLinha.contains("\n") && controle == true){ comentario = false; controle = false; }*/ //------------------ Fim dispensa comentrios -------------- // if (comentario == false) { //--------------------- verifica as linhas do cdigo ------------------- if (StringUtils.isNotBlank(children.getElementsByClass("numLine").text())) { aux = children.getElementsByClass("numLine").text().toCharArray(); for (int i = 0; i < aux.length; i++) { //System.out.println("["+aux[i]+"]"); if (aux[i] >= 48 && aux[i] <= 57) { // pega o nmero da linha sbLinha.append(aux[i]); } } auxLinha = sbLinha.toString(); if (StringUtils.isNotBlank(auxLinha)) { // transforma a linha para inteiro qtdeLinhas = Integer.parseInt(auxLinha); } sbLinha.delete(0, sbLinha.length()); } // ------------------- Fim linhas --------------------------------- Elements pre = children.getElementsByTag("pre"); for (Element element : pre) { String tagMetodo = element.getElementsByTag("span").eq(0).text(); //------------------------- Verifica classe ------------------------- if (element.getElementsByTag("span").text().contains("class")) { element.select("span.keyword").remove(); if (controleClasse == false) { classe = element.text().trim(); aux = classe.toCharArray(); for (int j = 0; j < aux.length; j++) { if ((65 <= aux[j]) && (aux[j] <= 90) || (aux[j] >= 97) && (aux[j] <= 122) || (aux[j] == 95)) { sbClasse.append(aux[j]); //System.out.println(j + ", " + sbClasse); if (j < aux.length - 1) { // System.out.println("size: "+aux.length+" j: "+j); if ((aux[j + 1] == ' ') || (aux[j + 1] == '{') || (aux[j + 1] == '<')) { // System.out.println("entrei"); if ((j + 1) < aux.length - 1) { for (int k = j++; k < aux.length; k++) { aux[k] = ' '; } } } } } } excluiLinhas.add(qtdeLinhas); classe = sbClasse.toString().replaceAll("\r", "").replaceAll("\t", "").replaceAll("\n", ""); controleClasse = true; } // System.out.println("Classe: " + classe); } //------------------------------- Fim verifica classe------------------------------ //------------------------------ Verifica mtodo ---------------------------------- //else if (tagMetodo.equals("privtate") || tagMetodo.equals("public") || tagMetodo.equals("protected")) { else if (element.getElementsByTag("span").text().contains("privtate") || element.getElementsByTag("span").text().contains("public") || element.getElementsByTag("span").text().contains("protected") || element.getElementsByTag("span").text().contains("static") || element.getElementsByTag("span").text().contains("final") || element.getElementsByTag("span").text().contains("native") || element.getElementsByTag("span").text().contains("synchronized") || element.getElementsByTag("span").text().contains("abstract") || element.getElementsByTag("span").text().contains("threadsafe") || element.getElementsByTag("span").text().contains("transient")) { element.select("span.keyword").remove(); if (!element.text().contains("=") && !element.text().contains(".") && !element.text().contains("@")) { String[] s = element.text().split(" "); for (int i = 0; i < s.length; i++) { if (s[i].contains("(")) { aux = s[i].toCharArray(); for (int j = 0; j < aux.length; j++) { if (aux[j] == '(') { for (int k = j; k < aux.length; k++) { aux[k] = ' '; } break; } sbMetodo.append(aux[j]); } metodoTemp = sbMetodo.toString(); if (!metodoTemp.isEmpty()) { metodo = metodoTemp.replaceAll("\r", "").replaceAll("\t", "").replaceAll("\n", ""); sbMetodo.delete(0, aux.length); informacoes = new Informacoes(classe, metodo, Integer.parseInt(auxLinha)); inf.add(informacoes); } } } } } // --------------------------- Fim Verifica Mtodo ------------------------------------ } // } } /* for(int i=0; i<inf.size(); i++){ System.out.println("Classe:"+inf.get(i).getClasse()+" Metodo:"+inf.get(i).getMetodo()+" Linha: "+inf.get(i).getLinha()); } // /* for(Map.Entry<String,Informacoes> entry : inf.entrySet()) { String key = entry.getKey(); int value = entry.getValue().getLinha(); String metodov = entry.getValue().getMetodo(); String classev = entry.getValue().getClasse(); System.out.println(key + " => " + classev+ " => " +metodov+ " => " +value); }*/ }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private Map<String, ConfluenceLink> buildTableOfContentsLinkMap() { final Map<String, ConfluenceLink> titleLinkMap = new HashMap<>(); final Document document = SWAGGER_DOCUMENT.get(); final Elements tocElements = document.select(".toc"); final Elements tocCategoryElements = tocElements.select(".sectlevel1").first().children(); final Elements tocFilteredCategoryElements = new Elements(); for (final Element tocCategoryElement : tocCategoryElements) { final Element categoryLinkElement = tocCategoryElement.children().first(); tocFilteredCategoryElements.add(categoryLinkElement); }/*from w ww .java 2 s. c o m*/ final Elements tocIndividualElements = tocElements.select(".sectlevel2"); addLinksByType(titleLinkMap, tocFilteredCategoryElements, PageType.CATEGORY, null); int categoryCount = 1; for (final Element tocIndividualElement : tocIndividualElements) { final Elements tocIndividualElementLinks = tocIndividualElement.select("a"); addLinksByType(titleLinkMap, tocIndividualElementLinks, INDIVIDUAL, categoryCount); categoryCount++; } return titleLinkMap; }
From source file:org.confab.PhpBB3Parser.java
public void createPost(Post replyTo, Post newPost, User user) { Utilities.debug("createPost"); try {/* w ww .java 2 s. c o m*/ String reply_page = replyTo.rootURL() + "newreply.php?do=newreply&noquote=1&p=" + replyTo.id; HttpGet httpget = new HttpGet(reply_page); HttpResponse response = httpclient.execute(httpget, user.httpContext); HttpEntity entity = response.getEntity(); Document page = Jsoup.parse(EntityUtils.toString(entity)); EntityUtils.consume(entity); assert page != null; // TODO: need check to make sure we're on the right page. HttpEntity's // can just contain garbage and jsoup will still consume it // Make sure we're logged in before going any further Element username_box = page.select("input[name=vb_login_username]").first(); assert username_box == null; Element password_box = page.select("input[name=vb_login_password]").first(); assert password_box == null; // Construct POST HttpPost httppost = new HttpPost(replyTo.rootURL() + "newreply.php"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); // There is a title param but think it's optional.. //nvps.add(new BasicNameValuePair("title", ""); nvps.add(new BasicNameValuePair("message", newPost.message)); // Find the form - we can parse the rest of the needed elements from it Element reply_form = page.select("form[action*=newreply.php?do=postreply&t=]").first(); assert reply_form != null; String[] vals_array = { "s", "securitytoken", "do", "t", "p", "specifiedpost", "posthash", "poststarttime", "loggedinuser", "multiquoteempty" }; List<String> vals = Arrays.asList(vals_array); for (String val : vals) { Element el = reply_form.select("input[name=" + val + "]").first(); assert el != null : val; nvps.add(new BasicNameValuePair(val, el.attr("value"))); } httppost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); // Execute the POST Utilities.debug("Executing POST"); response = httpclient.execute(httppost, user.httpContext); Utilities.debug("POST response: " + response.getStatusLine()); assert response.getStatusLine().getStatusCode() == 302; } catch (IOException e) { System.out.println(e); } Utilities.debug("end createPost"); }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
/** * Parses a "Nachrichten zum Tag" ("daily news") table from an Untis schedule * * @param table the <code>table</code>-Element to be parsed * @param day the {@link SubstitutionScheduleDay} where the messages should be stored *//*w w w . ja va 2s .c o m*/ private void parseMessages(Element table, SubstitutionScheduleDay day) { Elements zeilen = table.select("tr:not(:contains(Nachrichten zum Tag))"); for (Element i : zeilen) { Elements spalten = i.select("td"); String info = ""; for (Element b : spalten) { info += "\n" + TextNode.createFromEncoded(b.html(), null).getWholeText(); } info = info.substring(1); // remove first \n day.addMessage(info); } }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public DetailledItem getResultById(String id, final String homebranch) throws IOException { if (sessid == null) { start();//from www.java 2s. co m } // Homebranch if (homebranch != null && !"".equals(homebranch)) { cookieStore.addCookie(new BasicClientCookie("zweig", homebranch)); } String html = httpGet(opac_url + "/titel.cgi?katkey=" + id + "&sess=" + sessid, ENCODING, false, cookieStore); Document doc = Jsoup.parse(html); DetailledItem item = new DetailledItem(); item.setId(id); Elements table = doc.select(".titelsatz tr"); for (Element tr : table) { if (tr.select("th").size() == 0 || tr.select("td").size() == 0) { continue; } String d = tr.select("th").first().text(); String c = tr.select("td").first().text(); if (d.equals("Titel:")) { item.setTitle(c); } else if ((d.contains("URL") || d.contains("Link")) && tr.select("td a").size() > 0) { item.addDetail(new Detail(d, tr.select("td a").first().attr("href"))); } else { item.addDetail(new Detail(d, c)); } } if (doc.select(".ex table tr").size() > 0) { table = doc.select(".ex table tr"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : table) { if (tr.hasClass("exueber") || tr.select(".exsig").size() == 0 || tr.select(".exso").size() == 0 || tr.select(".exstatus").size() == 0) { continue; } Copy copy = new Copy(); copy.setShelfmark(tr.select(".exsig").first().text()); copy.setBranch(tr.select(".exso").first().text()); String status = tr.select(".exstatus").first().text(); if (status.contains("entliehen bis")) { copy.setReturnDate(fmt.parseLocalDate(status.replaceAll("entliehen bis ([0-9.]+) .*", "$1"))); copy.setReservations(status.replaceAll(".*\\(.*Vormerkungen: ([0-9]+)\\)", "$1")); copy.setStatus("entliehen"); } else { copy.setStatus(status); } item.addCopy(copy); } } for (Element a : doc.select(".status1 a")) { if (a.attr("href").contains("bestellung.cgi")) { item.setReservable(true); item.setReservation_info(id); break; } } for (Element a : doc.select(".titelsatz a")) { if (a.text().trim().matches("B.+nde")) { Map<String, String> volumesearch = new HashMap<>(); volumesearch.put("query", getQueryParamsFirst(a.attr("href")).get("query")); item.setVolumesearch(volumesearch); } } return item; }
From source file:com.adarshahd.indianrailinfo.donate.PNRStat.java
private void createTableLayoutTrnDtls() { if (mPageResult.contains("FLUSHED PNR / ") || mPageResult.contains("Invalid PNR")) { mTextViewPNRSts.setText("The PNR entered is either invalid or expired! Please check."); mFrameLayout.removeAllViews();//from w ww.j av a 2 s .c om mFrameLayout.addView(mTextViewPNRSts); return; } if (mPageResult.contains("Connectivity Failure") || mPageResult.contains("try again")) { mTextViewPNRSts.setText("Looks like server is busy or currently unavailable. Please try again later!"); mFrameLayout.removeAllViews(); mFrameLayout.addView(mTextViewPNRSts); return; } List<String> trainList; if (mTrainDetails == null || mTrainDetails.getPNR() != mPNRNumber) { Elements eleTrain = Jsoup.parse(mPageResult).select("table tr tr td:containsOwn(Train Number)"); Iterator iteTrain = null; try { iteTrain = eleTrain.first().parent().parent().parent().getElementsByTag("tr").iterator(); } catch (Exception e) { Log.i("PNRStat", mPageResult); return; } trainList = new ArrayList<String>(); Element tmp; //Get the third row for train details iteTrain.next(); iteTrain.next(); if (iteTrain.hasNext()) { tmp = (Element) iteTrain.next(); trainList.add(tmp.select("td").get(0).text()); trainList.add(tmp.select("td").get(1).text()); trainList.add(tmp.select("td").get(2).text()); trainList.add(tmp.select("td").get(5).text()); trainList.add(tmp.select("td").get(6).text()); trainList.add(tmp.select("td").get(7).text()); } mTrainDetails = new TrainDetails(trainList, mPNRNumber); } else { trainList = mTrainDetails.getTrainDetails(); } mTableLayoutTrn = new TableLayout(mActivity); mTableLayoutTrn.setLayoutParams(new FrameLayout.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT, ViewGroup.LayoutParams.WRAP_CONTENT)); TableRow row = new TableRow(mActivity); mStrTrainDetails = new String(); row.setLayoutParams(new FrameLayout.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT, ViewGroup.LayoutParams.WRAP_CONTENT)); for (String list : trainList) { TextView tv = new TextView(mActivity); tv.setText(list); tv.setPadding(10, 10, 10, 10); tv.setTextAppearance(mActivity, android.R.style.TextAppearance_DeviceDefault_Small); row.addView(tv); mStrTrainDetails += list + " "; } row.setBackgroundResource(R.drawable.card_background); row.setGravity(Gravity.CENTER_HORIZONTAL | Gravity.CENTER_VERTICAL); mTableLayoutTrn.addView(row); }
From source file:me.vertretungsplan.parser.IndiwareParser.java
void parseIndiwarePage(SubstitutionSchedule v, String response) throws JSONException, IOException { boolean html; Element doc; if (response.contains("<html") || response.contains("<table")) { html = true;/*from w ww. j a v a 2 s . c o m*/ doc = Jsoup.parse(response); } else { html = false; doc = Jsoup.parse(response, "", Parser.xmlParser()); } if (html && data.has(PARAM_EMBEDDED_CONTENT_SELECTOR)) { String selector = data.getString(PARAM_EMBEDDED_CONTENT_SELECTOR); Elements elems = doc.select(selector); if (elems.size() == 0) throw new IOException("No elements found using " + selector); for (Element elem : elems) { v.addDay(parseIndiwareDay(elem, true)); } } else if (html && doc.select(".vpfuer").size() > 1) { // multiple schedules after each other on one page String[] htmls = doc.html().split("<span class=\"vpfuer\">"); for (int i = 1; i < htmls.length; i++) { Document splitDoc = Jsoup.parse(htmls[i]); v.addDay(parseIndiwareDay(splitDoc, true)); } } else { v.addDay(parseIndiwareDay(doc, html)); } }
From source file:com.decker.parkingSearch.receiver.ParkingContentReceiver.java
public void fetch() throws IOException { Document doc = Jsoup.connect(this.baseUrl).get(); Elements detailBox = doc.select("td[style=\"vertical-align:top;\"]"); for (Element es : detailBox) { try {//from w ww . j av a2 s.com Park detail = new Park(); detail.name = es.childNode(0) instanceof Element ? ((Element) es.childNode(0)).text() : ""; detail.address = ""; for (int i = 1; i < es.childNodes().size() - 1; i++) { String content = (es.childNodes().get(i)).toString(); if (content.equals((es.childNodes().get(i + 1)).toString())) { break; } else { if (!content.equals("<br>")) { detail.address += (StringEscapeUtils.unescapeHtml(content) + " "); } } } if (es.select("span > a").size() == 0) { continue; } String secretContent = StringEscapeUtils .unescapeHtml(es.select("span > a").get(0).attr("href").replaceAll("\"", "")); Matcher matcher = Pattern.compile("(?<=javascript\\:count\\().*(?=\\))").matcher(secretContent); String[] secretInfoList; if (matcher.find()) { secretInfoList = matcher.group().split(","); } else { continue; } String mobContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=0", secretInfoList[1])) .get().text(); detail.mobileNumber = StringUtils.isNotBlank(mobContent) ? mobContent : ""; String phoneContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=1", secretInfoList[1])) .get().text(); detail.phoneNumber = StringUtils.isNotBlank(phoneContent) ? phoneContent : ""; String faxContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=2", secretInfoList[1])) .get().text(); detail.faxNumber = StringUtils.isNotBlank(faxContent) ? faxContent : ""; detail.email = StringUtils.isNotBlank(secretInfoList[2]) ? secretInfoList[2] : ""; this.info.parks.add(detail); } catch (Exception ex) { System.out.printf("Error during fetch %s park with url %s %n", es.childNode(0).toString(), this.baseUrl); ex.printStackTrace(); } } }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
private void autoDetectType(JSONObject data, Element zeile, Substitution v) { if (v.getType() == null) { if (data.optBoolean(PARAM_TYPE_AUTO_DETECTION, true)) { if ((zeile.select("strike").size() > 0 && equalsOrNull(v.getSubject(), v.getPreviousSubject()) && equalsOrNull(v.getTeacher(), v.getPreviousTeacher())) || (v.getSubject() == null && v.getRoom() == null && v.getTeacher() == null && v.getPreviousSubject() != null)) { v.setType("Entfall"); v.setColor(colorProvider.getColor("Entfall")); } else { v.setType("Vertretung"); v.setColor(colorProvider.getColor("Vertretung")); }/* w w w. jav a 2 s. c om*/ } else { v.setType("Vertretung"); v.setColor(colorProvider.getColor("Vertretung")); } } }