List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java
@Override public List<Seminar> parseSeminars() throws StudipException { ensureLoggedIn();/* w ww .ja v a 2 s . c o m*/ navigate(PAGE_SEMINARS); Elements events = document.select("#content>table:first-of-type>tbody>tr"); List<Seminar> seminars = new ArrayList<>(); for (org.jsoup.nodes.Element event : events) { if (event.select(">td").size() > 4) { Elements info = event.select(">td:nth-of-type(4)>a:first-of-type"); Elements font = info.select("font"); if (info.size() >= 1 && font.size() >= 2) { Seminar seminar = Seminar.getSeminar(info.get(0).absUrl("href"), font.get(0).text().trim(), font.get(1).text().trim()); seminars.add(seminar); } } } log.debug("Parsed " + seminars.size() + " seminars."); log.trace(seminars.toString()); return seminars; }
From source file:me.vertretungsplan.parser.SVPlanParser.java
private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0 || doc.title().startsWith("Vertretungsplan fr "))) { setDate(svp, doc, day);/*ww w . j ava 2 s.c om*/ if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) { Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr"); String lastLesson = ""; String lastClass = ""; for (Element row : rows) { if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header")) || row.select("th").size() > 0 || row.text().trim().equals("")) { continue; } Substitution substitution = new Substitution(); for (Element column : row.select("td")) { String type = column.className(); if (!hasData(column.text())) { if ((type.startsWith("svp-stunde") || type.startsWith("Stunde")) && hasData(lastLesson)) { substitution.setLesson(lastLesson); } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse")) && hasData(lastClass)) { substitution.getClasses().addAll(Arrays .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); } continue; } if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) { substitution.setLesson(column.text()); lastLesson = column.text(); } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) { substitution.getClasses().addAll(Arrays .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); lastClass = column.text(); } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setPreviousTeacher(column.text()); } } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setTeacher(column.text().replaceAll(" \\+$", "")); } } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) { substitution.setSubject(column.text()); } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) { substitution.setDesc(column.text()); String recognizedType = recognizeType(column.text()); substitution.setType(recognizedType); substitution.setColor(colorProvider.getColor(recognizedType)); } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) { substitution.setRoom(column.text()); } } if (substitution.getType() == null) { substitution.setType("Vertretung"); substitution.setColor(colorProvider.getColor("Vertretung")); } day.addSubstitution(substitution); } } if (svp.select(".LehrerVerplant").size() > 0) { day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text()); } if (svp.select(".Abwesenheiten").size() > 0) { day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text()); } if (svp.select("h2:contains(Mitteilungen)").size() > 0) { Element h2 = svp.select("h2:contains(Mitteilungen)").first(); Element sibling = h2.nextElementSibling(); while (sibling != null && sibling.tagName().equals("p")) { for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } sibling = sibling.nextElementSibling(); } } else if (svp.select(".Mitteilungen").size() > 0) { for (Element p : svp.select(".Mitteilungen")) { for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } } } v.addDay(day); } else { throw new IOException("keine SVPlan-Tabelle gefunden"); } }
From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java
/** * ???//www. ja v a2s . c o m * * @param eleTrs * @param rowNo * @return */ private String parseDetailTr(Element eleTr) throws Exception { Element eleTd = eleTr.select("td").get(1); // td if (eleTd.children().size() > 0) { return eleTd.child(0).html(); } else { return eleTd.html().trim(); } }
From source file:org.confab.PhpBB3Parser.java
public List<Forum> parseForums(Document root, BulletinBoard parent) { Utilities.debug("parseForums"); List<Forum> ret = new ArrayList<Forum>(); // get table//from w w w . j av a 2 s . c o m Elements forum_tables = root.select("ul[class=topiclist forums]"); assert !forum_tables.isEmpty() : root.html(); for (Element forum_table : forum_tables) { Elements els_li = forum_table.select("li.row"); assert !els_li.isEmpty(); for (Element el_li : els_li) { Forum new_forum = new Forum(parent); // Get the forum url Elements els_a = el_li.select("a.forumtitle"); Element el_a = els_a.first(); assert el_a != null; new_forum.url = el_a.attr("href"); assert new_forum.url != null; Utilities.debug("new_forum.url : " + new_forum.url); // Get the title text new_forum.title = el_a.text(); assert new_forum.title != null; Utilities.debug("new_forum.title : " + new_forum.title); // Check for any subforums in remaining a elements els_a.remove(els_a.first()); for (Element _el_a : els_a) { Forum sub_forum = new Forum(parent); sub_forum.url = el_a.attr("href"); assert sub_forum.url != null; sub_forum.title = el_a.text(); assert sub_forum.title != null; new_forum.subForums.add(sub_forum); Utilities.debug("added subForum: " + sub_forum.title); } // Get the description/message of this topic String el_description = el_a.parent().text(); if (el_description != null) { new_forum.description = el_description; } else { new_forum.description = ""; } Utilities.debug("new_forum.description : " + new_forum.description); Utilities.debug("new_forum.parent.url : " + new_forum.parent.url); ret.add(new_forum); Utilities.debug("-----"); } } Utilities.debug("end parseForums"); return ret; }
From source file:mergedoc.core.APIDocument.java
/** * ? Javadoc ????/*from w w w . j a v a 2 s . co m*/ * author, version ? Javadoc ???????????<br> * @param className ?? * @param docHtml API */ private void parseClassComment(String className, Document doc) { Elements elements = doc.select("body > div.contentContainer > div.description > ul > li"); for (Element element : elements) { String sigStr = element.select("pre").first().html(); Signature sig = createSignature(className, sigStr); Comment comment = new Comment(sig); // deprecated String depre = ""; Elements divs = element.select("div"); if (divs.size() == 2) { depre = divs.get(0).html(); } parseDeprecatedTag(className, depre, comment); // if (divs.size() > 0) { String body = divs.last().html(); body = formatLinkTag(className, body); comment.setDocumentBody(body); } // parseCommonTag(className, element, comment); log.debug(sig); contextTable.put(sig, comment); } }
From source file:de.geeksfactory.opacclient.apis.WebOpacAt.java
@Override public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException { if (!initialised) { start();/*from w ww . j ava 2 s . c o m*/ } final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element detailData = doc.select(".detailData").first(); final Element detailTable = detailData.select("table.titel").first(); final Element availabilityTable = doc.select(".bibliothek table").first(); final DetailledItem result = new DetailledItem(); final Copy copy = new Copy(); result.addCopy(copy); result.setId(id); result.setCover(getCover(doc)); result.setTitle(detailData.select("h3").first().text()); result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media"))); copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available")); copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent"))); copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations")); for (final Element tr : detailTable.select("tr")) { final String desc = tr.child(0).text(); final String content = tr.child(1).text(); if (desc != null && !desc.trim().isEmpty()) { result.addDetail(new Detail(desc, content)); } else if (!result.getDetails().isEmpty()) { final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1); lastDetail.setHtml(true); lastDetail.setContent(lastDetail.getContent() + "\n" + content); } } return result; }
From source file:mergedoc.core.APIDocument.java
/** * Javadoc ? ??????/* w w w . j a v a 2s.com*/ * @param className ?? * @param context * @param comment */ private void parseCommonTag(String className, Element element, Comment comment) { Elements dts = element.select("dl dt"); for (Element dt : dts) { String dtText = dt.text(); if (dtText.contains("")) { Elements aTags = dt.nextElementSibling().select("a:has(code)"); for (Element a : aTags) { String url = a.attr("href"); String ref; if (a.childNodeSize() != 1) { ref = aTags.outerHtml(); } else { ref = formatClassName(className, url); ref = FastStringUtils.replace(ref, "%28", "("); ref = FastStringUtils.replace(ref, "%29", ")"); Pattern methodRefPat = PatternCache.getPattern("-(.*)-$"); Matcher methodRefMat = methodRefPat.matcher(ref); if (methodRefMat.find()) { ref = FastStringUtils.replaceAll(ref, "-(.*)-$", "($1)"); // for Java8 ref = FastStringUtils.replace(ref, "-", ","); // for Java8 ref = FastStringUtils.replace(ref, ":A", "[]"); // for Java8 } } comment.addSee(ref); } } else if (dtText.contains("???:")) { comment.addSince(dt.nextElementSibling().text()); } } }
From source file:net.parser.JobParser.java
private String getEmployerName(Element jobElement, int id) { String name = null;/*from w ww . ja v a 2s.c o m*/ if (id == 0) { name = jobElement.select("strong").eq(0).text(); } else { name = jobElement.select(".employer").text(); } return name; }
From source file:com.gumtreescraper.scraper.GumtreeScraper.java
private boolean isOwner(Element adElement) { Elements forSaleByElements = adElement.select("span.rs-ad-attributes-forsaleby_s"); Elements forRentByElements = adElement.select("span.rs-ad-attributes-forrentby_s"); // sometime if ads is owner then it does not display if (forSaleByElements.isEmpty() && forRentByElements.isEmpty()) { return true; }/* ww w. java2 s. co m*/ if (!forSaleByElements.isEmpty() && ("agency".equalsIgnoreCase(forSaleByElements.first().text().trim()) || "agent".equalsIgnoreCase(forSaleByElements.first().text().trim()))) { return false; } if (!forRentByElements.isEmpty() && ("agency".equalsIgnoreCase(forRentByElements.first().text().trim()) || "agent".equalsIgnoreCase(forRentByElements.first().text().trim()))) { return false; } return true; }
From source file:de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java
@Override public List<Download> parseDownloads(String downloadsUrl, boolean structured) throws StudipException { try {/*from www.j a v a2 s. co m*/ ensureLoggedIn(); ensureCurrentSeminarSelected(); navigate(downloadsUrl); Map<Integer, Download> stack = new HashMap<>(); List<Download> downloads = new ArrayList<>(); Elements rows = document .select("#content>table>tbody>tr:nth-of-type(2)>td:nth-of-type(2)>table>tbody>tr>td>table"); for (org.jsoup.nodes.Element row : rows) { Elements content = row.select(">tbody>tr>td.printhead"); Elements insets = row.select(">tbody>tr>td.blank img"); if (content.size() >= 2) { Elements info = content.get(1).select("a"); Elements link = content.get(2).select("a[title]"); List<TextNode> time = content.get(2).textNodes(); if (info.size() > 0 && link.size() > 0 && time.size() > 0) { Download download = Download.getDownload(link.get(0).absUrl("href"), info.get(0).text().trim(), time.get(time.size() - 1).text().trim().replace("\u00a0", ""), ""); download.setSeminar(currentSeminar); int level = insets.size() - 3; if (level > 0) { download.setParent(stack.get(level - 1)); } else { download.setLevel(level); } stack.put(download.getLevel(), download); downloads.add(download); //TODO read size, description } } } log.debug("Parsed " + downloads.size() + " downloads."); log.trace(downloads.toString()); return downloads; } catch (StudipException ex) { ex.put("studip.seminar", currentSeminar); ex.put("parseDownloads.listUrl", downloadsUrl); ex.put("parseDownloads.structured", structured); throw ex; } }