List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:de.limod.portals.AutoScout.java
private String getTitle(Element result) { Elements t = result.select(AutoScout.SELECTOR_TITLE); String title = t.text();// www.j a v a 2 s.c o m return title; }
From source file:de.limod.portals.AutoScout.java
private String getUrl(Element result) { Elements t = result.select(AutoScout.SELECTOR_LINK); String attr = t.attr("href"); return attr;/*from w w w .j ava2 s. c o m*/ }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
static void parseMediaList(List<LentItem> media, Document doc, JSONObject data) { if (doc.select("a[name=AUS]").size() == 0) return;//from ww w .j a v a 2 s . co m Elements copytrs = doc.select("a[name=AUS] ~ table, a[name=AUS] ~ form table").first().select("tr"); doc.setBaseUri(data.optString("baseurl")); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); int trs = copytrs.size(); if (trs < 2) { return; } assert (trs > 0); JSONObject copymap = new JSONObject(); try { if (data.has("accounttable")) { copymap = data.getJSONObject("accounttable"); } } catch (JSONException e) { } Pattern datePattern = Pattern.compile("\\d{2}\\.\\d{2}\\.\\d{4}"); for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); LentItem item = new LentItem(); if (copymap.optInt("title", 0) >= 0) { item.setTitle(tr.child(copymap.optInt("title", 0)).text().trim().replace("\u00a0", "")); } if (copymap.optInt("author", 1) >= 0) { item.setAuthor(tr.child(copymap.optInt("author", 1)).text().trim().replace("\u00a0", "")); } if (copymap.optInt("format", 2) >= 0) { item.setFormat(tr.child(copymap.optInt("format", 2)).text().trim().replace("\u00a0", "")); } int prolongCount = 0; if (copymap.optInt("prolongcount", 3) >= 0) { prolongCount = Integer .parseInt(tr.child(copymap.optInt("prolongcount", 3)).text().trim().replace("\u00a0", "")); item.setStatus(String.valueOf(prolongCount) + "x verl."); } if (data.optInt("maxprolongcount", -1) != -1) { item.setRenewable(prolongCount < data.optInt("maxprolongcount", -1)); } if (copymap.optInt("returndate", 4) >= 0) { String value = tr.child(copymap.optInt("returndate", 4)).text().trim().replace("\u00a0", ""); Matcher matcher = datePattern.matcher(value); if (matcher.find()) { try { item.setDeadline(fmt.parseLocalDate(matcher.group())); } catch (IllegalArgumentException e1) { e1.printStackTrace(); } } } if (copymap.optInt("prolongurl", 5) >= 0) { if (tr.children().size() > copymap.optInt("prolongurl", 5)) { Element cell = tr.child(copymap.optInt("prolongurl", 5)); if (cell.select("input[name=MedNrVerlAll]").size() > 0) { // new iOPAC Version 1.45 - checkboxes to prolong multiple items // internal convention: We add "NEW" to the media ID to show that we have // the new iOPAC version Element input = cell.select("input[name=MedNrVerlAll]").first(); String value = input.val(); item.setProlongData("NEW" + value); item.setId(value.split(";")[0]); if (input.hasAttr("disabled")) item.setRenewable(false); } else { // previous versions - link for prolonging on every medium String link = cell.select("a").attr("href"); item.setProlongData(link); // find media number with regex Pattern pattern = Pattern.compile("mednr=([^&]*)&"); Matcher matcher = pattern.matcher(link); if (matcher.find() && matcher.group() != null) item.setId(matcher.group(1)); } } } media.add(item); } assert (media.size() == trs - 1); }
From source file:coding.cowboys.scrapers.DvcMagicResalesScraper.java
public List<ResortWrapper> findResorts() { List<ResortWrapper> wrappers = new ArrayList<ResortWrapper>(); Document doc = null;/*from ww w. j a v a 2 s . c om*/ try { doc = Jsoup.connect(SiteUrls.DVC_MAGIC_RESALES).timeout(60000).get(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (doc != null) { for (Element element : doc.select("table#listALL")) { for (Element row : element.select("tr")) { if (row.hasClass("stat-Active")) { ResortWrapper wrapper = new ResortWrapper(); Elements data = row.select("td"); wrapper.setResort(getResortFromText(data.get(0).text())); wrapper.setUseYear(data.get(1).text()); wrapper.setPoints(data.get(2).text()); wrapper.setPricePerPoint(data.get(3).text()); wrapper.setTotalPrice(data.get(4).text()); wrapper.setPointSummary(data.get(0).text().replace(wrapper.getResort(), "")); wrapper.setUrl("http://www.dvcmagicresales.com/dvcmr/resales-all-listings/"); wrappers.add(wrapper); } } } } else { System.out.println("DVC Magic Resales returned null"); } return wrappers; }
From source file:com.isoftstone.proxy.api.sdk.KuaidailiProxySDK.java
private List<ProxyVo> parseHtml(Document doc) { Elements eles = doc.select("#list table tr"); List<ProxyVo> proxyList = new ArrayList<ProxyVo>(); for (int i = 1; i < eles.size(); i++) { Element ele = eles.get(i); Element ipEle = ele.select("td:eq(0)").first(); Element portEle = ele.select("td:eq(1)").first(); ProxyVo proxyVo = new ProxyVo(); proxyVo.setProxyIp(ipEle.text()); proxyVo.setProxyPort(Integer.parseInt(portEle.text())); proxyList.add(proxyVo);/*from ww w . ja va 2 s .co m*/ } return proxyList; }
From source file:de.dlopes.stocks.facilitator.services.impl.FinanzenNetIndexHTMLExtractorImpl.java
@Override public List<String> getFinanceData(String url, FinanceDataType dataType) { List<String> list = new ArrayList<String>(); try {//from w w w. j a va 2 s.co m Document doc = null; if (url.startsWith("file://")) { File input = new File(url.replaceFirst("file://", "")); doc = Jsoup.parse(input, "UTF-8"); } else { URL input = new URL(url); doc = Jsoup.parse(input, 30000); } //String index = doc.body().select("div#mainWrapper > div.main h1 > a").text(); Elements elements = doc.body().select("#fragIndexBarView > table tr"); for (Element e : elements) { String text = e.select("td > div").text(); // Guard: move on when the text is empty if (StringUtils.isEmpty(text)) { continue; } text = StringUtils.trimAllWhitespace(text); list.add(text); } } catch (IOException e) { e.printStackTrace(); } return list; }
From source file:org.manalith.ircbot.plugin.linuxpkgfinder.PhPortageProvider.java
@Override public String find(String arg) { String result = ""; String url = "http://darkcircle.kr/phportage/phportage.xml?k=" + arg + "&limit=1&similarity=exact" + "&showmasked=true&livebuild=false"; try {//from w w w.j a v a2s. c o m Document d = Jsoup.connect(url).get(); System.out.println(d.select("result>code").get(0).text()); if (NumberUtils.toInt(d.select("result>code").get(0).text()) == 0) { if (NumberUtils.toInt(d.select("result>actualnumofres").get(0).text()) == 0) result = "[Gentoo] "; else { Element e = d.select("result>packages>pkg").get(0); String pkgname = e.select("category").get(0).text() + "/" + e.select("name").get(0).text(); String ver = e.select("version").get(0).text(); String description = e.select("description").get(0).text(); result = "[Gentoo] \u0002" + pkgname + "\u0002 - " + description + ", " + ver; } } } catch (Exception e) { logger.error(e.getMessage(), e); result = ": " + e.getMessage(); } return result; }
From source file:org.brunocvcunha.taskerbox.impl.crawler.CodepadAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".section")) { final String id = el.select("a").attr("href").replace("http://codepad.org/", ""); String code = el.select("pre").text().replaceAll("\r?\n", " "); if (code.length() > 32) { code = code.substring(0, 32); }/*from w w w . j ava 2s. co m*/ final String title = id + " - " + code; if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:org.brunocvcunha.taskerbox.impl.crawler.PastieAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".pastePreview")) { final String id = el.select("a").attr("href").replace("http://pastie.org/pastes/", ""); String code = el.select("pre").text().replaceAll("\r?\n", " "); if (code.length() > 32) { code = code.substring(0, 32); }/*from ww w .java2 s.c o m*/ final String title = id + " - " + code; if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:de.geeksfactory.opacclient.apis.Zones.java
static List<ReservedItem> parseResList(Document doc) { List<ReservedItem> reservations = new ArrayList<>(); for (Element table : doc.select( ".MessageBrowseItemDetailsCell table, " + ".MessageBrowseItemDetailsCellStripe" + " table")) { ReservedItem item = new ReservedItem(); for (Element tr : table.select("tr")) { String desc = tr.select(".MessageBrowseFieldNameCell").text().trim(); String value = tr.select(".MessageBrowseFieldDataCell").text().trim(); if (desc.equals("Titel")) item.setTitle(value);//from w ww .java 2 s . co m if (desc.equals("Publikationsform")) item.setFormat(value); if (desc.equals("Liefern an")) item.setBranch(value); if (desc.equals("Status")) item.setStatus(value); } if ("Gelscht".equals(item.getStatus())) continue; reservations.add(item); } return reservations; }