List of usage examples for org.jsoup.nodes Element className
public String className()
From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", "")); Element select = navbarDoc.select("select[name=week]").first(); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); String info = navbarDoc.select(".description").text(); String stand;/*from w w w . ja v a 2s .c o m*/ try { stand = info.substring(info.indexOf("Stand:")); } catch (Exception e) { stand = ""; } for (Element option : select.children()) { String week = option.attr("value"); String letter = data.optString("letter", "w"); if (data.optBoolean("single_classes", false)) { int classNumber = 1; for (String klasse : getAllClasses()) { String paddedNumber = String.format("%05d", classNumber); String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } writeTagByDatum(tage, tag); } classNumber++; } } else { String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } } v.setTage(tage); } return v; }
From source file:me.vertretungsplan.parser.IndiwareParser.java
SubstitutionScheduleDay parseIndiwareDay(Element doc, boolean html) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); DataSource ds;//from w ww. j a va 2 s . c o m if (html) { ds = new HTMLDataSource(doc); } else { ds = new XMLDataSource(doc); } Matcher matcher = datePattern.matcher(ds.titel().text()); if (!matcher.find()) throw new IOException("malformed date: " + ds.titel().text()); String date = matcher.group(); day.setDate( DateTimeFormat.forPattern("EEEE, dd. MMMM yyyy").withLocale(Locale.GERMAN).parseLocalDate(date)); String lastChange = ds.datum().text(); day.setLastChange(DateTimeFormat.forPattern("dd.MM.yyyy, HH:mm").withLocale(Locale.GERMAN) .parseLocalDateTime(lastChange)); if (ds.kopfinfos().size() > 0) { for (Element kopfinfo : ds.kopfinfos()) { String title = html ? kopfinfo.select("th").text() : kopfinfoTitle(kopfinfo.tagName()) + ":"; StringBuilder message = new StringBuilder(); if (title != null && !title.isEmpty()) { message.append("<b>").append(title).append("</b>").append(" "); } message.append(html ? kopfinfo.select("td").text() : kopfinfo.text()); day.addMessage(message.toString()); } } if (ds.fuss() != null) { StringBuilder message = new StringBuilder(); boolean first = true; for (Element fusszeile : ds.fusszeilen()) { if (first) { first = false; } else { message.append("\n"); } message.append(fusszeile.text()); } day.addMessage(message.toString()); } List<String> columnTypes = null; if (html) { columnTypes = new ArrayList<>(); for (Element th : ((HTMLDataSource) ds).headers()) { columnTypes.add(th.className().replace("thplan", "").replace("thlplan", "")); } } for (Element aktion : ds.aktionen()) { Substitution substitution = new Substitution(); String type = "Vertretung"; String course = null; int i = 0; for (Element info : aktion.children()) { String value = info.text().replace("\u00a0", ""); if (value.equals("---")) { i++; continue; } final String columnType = html ? columnTypes.get(i) : info.tagName(); switch (columnType) { case "klasse": Set<String> classes = new HashSet<>(); for (String klasse : value.split(",")) { Matcher courseMatcher = coursePattern.matcher(klasse); if (courseMatcher.matches()) { classes.add(courseMatcher.group(1)); course = courseMatcher.group(2); } else { classes.add(klasse); } } substitution.setClasses(classes); break; case "stunde": substitution.setLesson(value); break; case "fach": String subject = subjectAndCourse(course, value); if (columnTypes != null && columnTypes.contains("vfach")) { substitution.setPreviousSubject(subject); } else { substitution.setSubject(subject); } break; case "vfach": substitution.setSubject(subjectAndCourse(course, value)); case "lehrer": Matcher bracesMatcher = bracesPattern.matcher(value); if (bracesMatcher.matches()) value = bracesMatcher.group(1); substitution.setTeacher(value); break; case "raum": if (columnTypes != null && columnTypes.contains("vraum")) { substitution.setPreviousRoom(value); } else { substitution.setRoom(value); } break; case "vraum": substitution.setRoom(value); case "info": Matcher substitutionMatcher = substitutionPattern.matcher(value); Matcher cancelMatcher = cancelPattern.matcher(value); Matcher delayMatcher = delayPattern.matcher(value); Matcher selfMatcher = selfPattern.matcher(value); if (substitutionMatcher.matches()) { substitution.setPreviousSubject(substitutionMatcher.group(1)); substitution.setPreviousTeacher(substitutionMatcher.group(2)); if (!substitutionMatcher.group(3).isEmpty()) { substitution.setDesc(substitutionMatcher.group(3)); } } else if (cancelMatcher.matches()) { type = "Entfall"; substitution.setPreviousSubject(cancelMatcher.group(1)); substitution.setPreviousTeacher(cancelMatcher.group(2)); } else if (delayMatcher.matches()) { type = "Verlegung"; substitution.setPreviousSubject(delayMatcher.group(1)); substitution.setPreviousTeacher(delayMatcher.group(2)); substitution.setDesc(delayMatcher.group(3)); } else if (selfMatcher.matches()) { type = "selbst."; if (!selfMatcher.group(1).isEmpty()) substitution.setDesc(selfMatcher.group(1)); } else if (value.equals("fllt aus") || value.equals("Klausur") || value.equals("Aufg.")) { type = value; } else { substitution.setDesc(value); } break; } i++; } substitution.setType(type); substitution.setColor(colorProvider.getColor(substitution.getType())); if (course != null && substitution.getSubject() == null) { substitution.setSubject(course); } day.addSubstitution(substitution); } return day; }
From source file:net.vexelon.mobileops.GLBClient.java
public String getCurrentBalance() throws HttpClientException { StringBuilder builder = new StringBuilder(100); HttpResponse resp;//from w ww . j ava 2s .co m long bytesCount = 0; try { String url = HTTP_MYTELENOR + GLBRequestType.GET_BALANCE.getPath(); url += '?'; url += new Date().getTime(); HttpGet httpGet = new HttpGet(url); // httpGet.setHeader("X-Requested-With", "XMLHttpRequest"); resp = httpClient.execute(httpGet, httpContext); } catch (Exception e) { throw new HttpClientException("Client protocol error!" + e.getMessage(), e); } StatusLine status = resp.getStatusLine(); if (status.getStatusCode() != HttpStatus.SC_OK) throw new HttpClientException(status.getReasonPhrase(), status.getStatusCode()); try { HttpEntity entity = resp.getEntity(); // bytes downloaded bytesCount = entity.getContentLength() > 0 ? entity.getContentLength() : 0; Document doc = Jsoup.parse(entity.getContent(), RESPONSE_ENCODING, ""); Elements elements; // period bill elements = doc.select("#outstanding-amount"); if (elements.size() > 0) { Elements divs = elements.get(0).select("div"); for (Element el : divs) { String elClass = el.className(); if (elClass.contains("custme-select") || elClass.equalsIgnoreCase("history")) { builder.insert(0, el.html()); } } } // current bill elements = doc.select("#bars-wrapper .p-price"); if (elements.size() > 0) { Element el = elements.get(0); builder.insert(0, el.html()); } return builder.toString(); } catch (ClientProtocolException e) { throw new HttpClientException("Client protocol error!" + e.getMessage(), e); } catch (IOException e) { throw new HttpClientException("Client error!" + e.getMessage(), e); } finally { addDownloadedBytesCount(bytesCount); } }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
void parseDay(SubstitutionScheduleDay day, Element next, SubstitutionSchedule v, String klasse) throws JSONException, CredentialInvalidException { if (next.className().equals("subst") || next.select(".list").size() > 0 || next.text().contains("Vertretungen sind nicht freigegeben") || next.text().contains("Keine Vertretungen")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) { return; }// ww w . ja v a 2 s .c o m parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse); } else { //Nachrichten parseMessages(next, day); next = next.nextElementSibling().nextElementSibling(); parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse); } v.addDay(day); }
From source file:uk.co.certait.htmlexporter.css.StyleMap.java
private List<Style> getStylesForClass(Element element) { List<Style> classStyles = new ArrayList<Style>(); if (StringUtils.isNotEmpty(element.className())) { String[] classNames = element.className().split(" "); for (String className : classNames) { String qualifiedClassName = CLASS_PREFIX + className.trim(); if (styles.containsKey(qualifiedClassName)) { classStyles.add(styles.get(qualifiedClassName)); }//from w w w . j a va2s .c o m } } return classStyles; }
From source file:us.colloquy.sandbox.TestExtractor.java
@Test public void useJsoup() { String homeDir = System.getProperty("user.home"); System.out.println(homeDir);/*ww w . j av a 2 s .c o m*/ //JSOUP API allows to extract all elements of letters in files // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml"); File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html"); try { Document doc = Jsoup.parse(input, "UTF-8"); List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields String previousYear = ""; for (Element element : doc.getElementsByClass("section")) { Letter letter = new Letter(); StringBuilder content = new StringBuilder(); for (Element child : element.children()) { for (Attribute att : child.attributes()) { System.out.println(att.getKey() + " " + att.getValue()); } if ("center".equalsIgnoreCase(child.className())) { String toWhom = child.getElementsByTag("strong").text(); if (StringUtils.isEmpty(toWhom)) { toWhom = child.text(); // System.out.println(toWhom); } String[] toWhomArray = toWhom.split("(\\s\\s)|(,)"); for (String to : toWhomArray) { RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content } //check if there is anything else here and find date and place - it will be replaced if exists below String entireText = child.text(); String tail = entireText.replace(toWhom, ""); if (StringUtils.isNotEmpty(tail)) { RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present } // System.out.println("two whom\t " + child.getElementsByTag("strong").text() ); } else if ("Data".equalsIgnoreCase(child.className())) { if (child.getElementsByTag("em") != null && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) { RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(), previousYear); //most often date and place are enclosed in em tag if (letter.getDate() != null) { LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault()) .toLocalDate(); int year = localDate.getYear(); previousYear = year + ""; } } // System.out.println("when and where\t " + child.getElementsByTag("em").text()); } else if ("petit".equalsIgnoreCase(child.className()) || "Textpetit_otstup".equalsIgnoreCase(child.className())) { letter.getNotes().add(child.text()); } else { //System.out.println(child.text() ); Elements elements = child.getElementsByTag("sup"); for (Element e : elements) { String value = e.text(); e.replaceWith(new TextNode("[" + value + "]", null)); } for (Element el : child.getAllElements()) { // System.out.println(el.tagName()); if ("sup".equalsIgnoreCase(el.tagName())) { content.append(" [" + el.text() + "] "); } else { content.append(el.text()); } } content.append("\n"); } // System.out.println(child.tag() + "\n" ); // System.out.println(child.outerHtml() + "\n" + child.text()); } letter.setContent(content.toString()); letters.add(letter); } ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter(); for (Letter letter : letters) { // if (letter.getDate() == null) // { // if (StringUtils.isNotEmpty(person.getLastName())) // { String json = ow.writeValueAsString(letter); System.out.println(json); // } //} } } catch (IOException e) { e.printStackTrace(); } }