List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.example.amazon.mw.exempli.ExempliClient.java
public ArrayList<String> getImdbData(String videoAsin) { ArrayList<String> guides = new ArrayList<String>(); try {/* w ww . jav a 2 s. c o m*/ String imdbId = asinToImdbId.get(videoAsin); Log.e("Asin: ", videoAsin); Log.e("Parent guide page: ", "http://www.imdb.com/title/" + imdbId + "/parentalguide"); Document doc = Jsoup.connect("http://www.imdb.com/title/" + imdbId + "/parentalguide").get(); Elements parentGuideElements = doc.select(".display p"); for (Element element : parentGuideElements) { guides.add(element.text()); Log.e("retrieved guide: ", element.text()); } } catch (IOException e) { Log.e("exception", "Client failure: ", e); } return guides; }
From source file:com.decker.parkingSearch.receiver.ParkingContentReceiver.java
public void fetch() throws IOException { Document doc = Jsoup.connect(this.baseUrl).get(); Elements detailBox = doc.select("td[style=\"vertical-align:top;\"]"); for (Element es : detailBox) { try {/* ww w . ja v a 2s.com*/ Park detail = new Park(); detail.name = es.childNode(0) instanceof Element ? ((Element) es.childNode(0)).text() : ""; detail.address = ""; for (int i = 1; i < es.childNodes().size() - 1; i++) { String content = (es.childNodes().get(i)).toString(); if (content.equals((es.childNodes().get(i + 1)).toString())) { break; } else { if (!content.equals("<br>")) { detail.address += (StringEscapeUtils.unescapeHtml(content) + " "); } } } if (es.select("span > a").size() == 0) { continue; } String secretContent = StringEscapeUtils .unescapeHtml(es.select("span > a").get(0).attr("href").replaceAll("\"", "")); Matcher matcher = Pattern.compile("(?<=javascript\\:count\\().*(?=\\))").matcher(secretContent); String[] secretInfoList; if (matcher.find()) { secretInfoList = matcher.group().split(","); } else { continue; } String mobContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=0", secretInfoList[1])) .get().text(); detail.mobileNumber = StringUtils.isNotBlank(mobContent) ? mobContent : ""; String phoneContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=1", secretInfoList[1])) .get().text(); detail.phoneNumber = StringUtils.isNotBlank(phoneContent) ? phoneContent : ""; String faxContent = Jsoup .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=2", secretInfoList[1])) .get().text(); detail.faxNumber = StringUtils.isNotBlank(faxContent) ? faxContent : ""; detail.email = StringUtils.isNotBlank(secretInfoList[2]) ? secretInfoList[2] : ""; this.info.parks.add(detail); } catch (Exception ex) { System.out.printf("Error during fetch %s park with url %s %n", es.childNode(0).toString(), this.baseUrl); ex.printStackTrace(); } } }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
static List<LentItem> parse_medialist(Document doc) { List<LentItem> media = new ArrayList<>(); Elements copytrs = doc.select(".data tr"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); int trs = copytrs.size(); if (trs == 1) { return null; }//w w w . j a v a 2 s . c o m assert (trs > 0); for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); LentItem item = new LentItem(); if (tr.text().contains("keine Daten")) { return null; } item.setTitle(tr.select(".account-display-title").select("b, strong").text().trim()); try { item.setAuthor(tr.select(".account-display-title").html().split("<br[ /]*>")[1].trim()); String[] col3split = tr.select(".account-display-state").html().split("<br[ /]*>"); String deadline = Jsoup.parse(col3split[0].trim()).text().trim(); if (deadline.contains(":")) { // BSB Munich: <span class="hidden-sm hidden-md hidden-lg">Flligkeitsdatum : </span>26.02.2016<br> deadline = deadline.split(":")[1].trim(); } if (deadline.contains("-")) { // Chemnitz: 22.07.2015 - 20.10.2015<br> deadline = deadline.split("-")[1].trim(); } try { item.setDeadline(fmt.parseLocalDate(deadline).toString()); } catch (IllegalArgumentException e1) { e1.printStackTrace(); } if (col3split.length > 1) item.setHomeBranch(col3split[1].trim()); if (tr.select("a").size() > 0) { for (Element link : tr.select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq.get("methodToCall").equals("renewal")) { item.setProlongData(href.split("\\?")[1]); item.setRenewable(true); break; } } } } catch (Exception ex) { ex.printStackTrace(); } media.add(item); } return media; }
From source file:org.javiermoreno.torrentscratcher.Runner.java
public Movie enrichMovieWithImdbSearch(Movie movie) { try {//from ww w. j av a 2 s.com String url = "http://www.imdb.com/find?q={title}&s=all"; String title = movie.getOriginalTitle() != null ? movie.getOriginalTitle() : movie.getTitle(); url = url.replace("{title}", java.net.URLEncoder.encode(title, "UTF-8")); Document doc = Jsoup.connect(url).get(); Elements results = doc.select(".result_text a"); if (results.size() == 0) { log.warn("IMDB search 404: " + movie.getTitle()); return movie; } String link = results.first().attr("href"); String imdbId = link.substring("/title/".length(), link.indexOf("?") - 1); movie.setImdbId(imdbId); url = "http://www.imdb.com" + link; doc = Jsoup.connect(url).get(); movie.setGenre(doc.select("[itemprop=genre]").eq(0).text()); String rating = doc.select("[itemprop=aggregateRating] [itemprop=ratingValue]").text(); if (rating.isEmpty() == false) { movie.setRating(Double.valueOf(rating.replace(',', '.'))); } } catch (IOException ex) { log.warn(ex.getMessage()); } return movie; }
From source file:org.cellcore.code.engine.page.extractor.mb.MBPageDataExtractor.java
protected float getPrice(Document doc) { if (!doc.getElementsContainingText("Cette carte n'est pas disponible en stock").isEmpty()) { return -1; }/* w w w .j a v a 2 s . co m*/ Elements tr = doc.select(".stock").get(0).getElementsByTag("tr"); float iPrice = Float.MAX_VALUE; for (int i = 1; i < tr.size(); i++) { String val = tr.get(i).getElementsByTag("td").get(3).childNodes().get(0).attr("text"); val = cleanPriceString(val); float price = Float.parseFloat(val); if (price < iPrice) { iPrice = price; } } return iPrice; }
From source file:org.wallride.web.support.Posts.java
protected String parse(String html) { Document document = Jsoup.parse(html); Elements elements = document.select("img"); for (Element element : elements) { String src = element.attr("src"); if (src.startsWith(wallRideProperties.getMediaUrlPrefix())) { String style = element.attr("style"); Pattern pattern = Pattern.compile("width: ([0-9]+)px;"); Matcher matcher = pattern.matcher(element.attr("style")); if (matcher.find()) { String replaced = src + "?w=" + Integer.parseInt(matcher.group(1)) * 2; element.attr("src", replaced); }/*from www . jav a 2 s.c o m*/ } } return document.body().html(); }
From source file:org.cellcore.code.engine.page.extractor.mb.MBPageDataExtractor.java
@Override protected int getStock(Document doc) { if (!doc.getElementsContainingText("Cette carte n'est pas disponible en stock").isEmpty()) { return 0; }/*from www . ja v a 2 s . co m*/ Elements tr = doc.select(".stock").get(0).getElementsByTag("tr"); float iPrice = Float.MAX_VALUE; int iStock = 0; for (int i = 1; i < tr.size(); i++) { String val = tr.get(i).getElementsByTag("td").get(3).childNodes().get(0).attr("text"); String stockV = tr.get(i).getElementsByTag("td").get(4).childNodes().get(1).attr("text"); val = cleanPriceString(val); float price = Float.parseFloat(val); if (price < iPrice) { iPrice = price; iStock = Integer.parseInt(stockV.replaceAll("\\(", "").replaceAll("\\)", "")); } } return iStock; }
From source file:com.josue.lottery.eap.service.core.LotoImporter.java
private void parseHtml(File file) { // String html = "<html><head><title>First parse</title></head>" // + "<body><p>Parsed HTML into a doc.</p>" // +/*from w w w .ja v a 2s. c om*/ // " <table><tr><td>satu</td><td>satu-1</td></tr><tr><td>dua</td><td>dua-1</td></tr><tr><td>tiga</td><td>tiga-1</td></tr></table> " // + "</body></html>"; StringBuilder sb = new StringBuilder(); BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); } catch (FileNotFoundException ex) { java.util.logging.Logger.getLogger(LotoImporter.class.getName()).log(Level.SEVERE, null, ex); } String line; try { while ((line = br.readLine()) != null) { sb.append(line); } } catch (IOException ex) { java.util.logging.Logger.getLogger(LotoImporter.class.getName()).log(Level.SEVERE, null, ex); } Document doc = Jsoup.parse(sb.toString()); Element table = doc.select("table").first(); Iterator<Element> iterator = table.select("td").iterator(); while (iterator.hasNext()) { logger.info("text : " + iterator.next().text()); } String title = doc.title(); System.out.println("Document title : " + title); }
From source file:hu.petabyte.redflags.engine.gear.parser.MetadataParser.java
public Notice parseDataTab(Notice notice, Document dataTab) { Data data = notice.getData(); notice.setCancelled(!dataTab.select("div#cancelDoc").isEmpty()); for (Element tr : dataTab.select("table.data tr")) { String field = tr.select("th").first().text(); String value = JsoupUtils.text(tr.select("td").last()); LOG.trace("{} data: {} = {}", notice.getId(), field, value); setDataField(data, field, value); }// w w w. jav a 2 s . c o m return notice; }
From source file:me.vertretungsplan.parser.UntisInfoHeadlessParser.java
@Override public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException { new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore); SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); Document doc = Jsoup.parse(httpGet(url, data.optString(PARAM_ENCODING, null))); doc.setBaseUri(url);//from w w w. ja va 2s.c o m Elements dayElems = doc.select("#vertretung > p > b, #vertretung > b"); Elements frames = doc.select("frame[src*=w00]"); if (dayElems.size() == 0 && frames.size() > 0) { // doc is embedded in frame doc = Jsoup.parse(httpGet(frames.get(0).absUrl("src"), data.optString(PARAM_ENCODING, null))); dayElems = doc.select("#vertretung > p > b, #vertretung > b"); } for (Element dayElem : dayElems) { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); day.setLastChangeString(""); String date = dayElem.text(); day.setDateString(date); day.setDate(ParserUtils.parseDate(date)); Element next; if (dayElem.parent().tagName().equals("p")) { next = dayElem.parent().nextElementSibling().nextElementSibling(); } else { next = dayElem.parent().select("p").first().nextElementSibling(); } parseDay(day, next, v, null); } v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; }