List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java
/** * ?//from w ww .j ava 2s. c o m * * @param document * @return * @throws Exception */ private Object analyzeLicenseDetail(HttpSeed seed) throws Exception { Document doc = parse(seed.getHtml()); Elements eleTable = doc.select(".listmain table"); // TR Elements eleTrs = eleTable.get(0).select("tr"); // ? Object entity = AnalyzeUtil.getInstant(PREFIX_ENTITY_PATH + syjTableBean.getTableClass()); // tr?trtd? int rowNo = 1; for (int i = 0; i < eleTrs.size(); i++) { Element eleTr = eleTrs.get(i); // ??trtd??nowrapnowrap?true if (i != eleTrs.size() - 1 && (!eleTr.select("td").get(0).hasAttr("nowrap") || !"true".equals(eleTr.select("td").get(0).attr("nowrap")))) { continue; } // td? String tdVal = parseDetailTr(eleTr); // TABLE7411?? if (syjTableBean.getTableClass().equals("TABLE74") && rowNo == 11) { continue; } // entity AnalyzeUtil.executeMethod(entity, PREFIX_ATTRIBUTE + rowNo++, new Object[] { tdVal }, new Class[] { String.class }); } // ?ID, ?createEmpCode String regex = ".+?&Id=(.+?)"; Object obj = AnalyzeUtil.regex(seed.getUrl(), regex); if (null == obj) { // ID AnalyzeUtil.executeMethod(entity, "setContentId", new Object[] { 0l }, new Class[] { Long.class }); } else { // ID AnalyzeUtil.executeMethod(entity, "setContentId", new Object[] { Long.valueOf(obj.toString()) }, new Class[] { Long.class }); } // ? AnalyzeUtil.executeMethod(entity, "setCreateTime", new Object[] { new Timestamp(new Date().getTime()) }, new Class[] { Timestamp.class }); return entity; }
From source file:me.vertretungsplan.parser.DaVinciParser.java
@NotNull void parsePage(Element doc, SubstitutionSchedule schedule) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); Element titleElem;// w w w.ja v a 2 s . c o m if (doc.select("h1.list-table-caption").size() > 0) { titleElem = doc.select("h1.list-table-caption").first(); } else { // DaVinci 5 titleElem = doc.select("h2").first(); } String title = titleElem.text(); String klasse = null; // title can either be date or class Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}"); Matcher dateMatcher = datePattern.matcher(title); if (dateMatcher.find()) { day.setDateString(dateMatcher.group()); day.setDate(ParserUtils.parseDate(dateMatcher.group())); } else { klasse = title; String nextText = titleElem.nextElementSibling().text(); if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) { day.setDateString(nextText); day.setDate(ParserUtils.parseDate(nextText)); } else { // could not find date, must be multiple days day = null; } } for (Element p : doc.select(".row:has(h1.list-table-caption) p")) { for (TextNode node : p.textNodes()) { if (!node.text().trim().isEmpty() && day != null) day.addMessage(node.text().trim()); } } for (Element message : doc.select(".callout")) { for (TextNode node : message.textNodes()) { if (!node.text().trim().isEmpty()) day.addMessage(node.text().trim()); } } Element lastChangeElem = doc.select(".row.copyright div").first(); if (lastChangeElem == null) { // DaVinci 5 lastChangeElem = doc.select("h1").first(); } String lastChange = lastChangeElem.ownText(); Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|"); Matcher matcher = pattern.matcher(lastChange); if (matcher.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm") .parseLocalDateTime(matcher.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } else { Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})"); Matcher matcher2 = pattern2.matcher(lastChange); if (matcher2.find()) { LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm") .parseLocalDateTime(matcher2.group(1)); if (day != null) { day.setLastChange(lastChangeTime); } else { schedule.setLastChange(lastChangeTime); } } } if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) { Element table = doc.select(".list-table, table").first(); parseDaVinciTable(table, schedule, klasse, day, colorProvider); } if (day != null) { schedule.addDay(day); } }
From source file:me.vertretungsplan.parser.UntisInfoParser.java
private void parseTimetable(SubstitutionSchedule v, String lastChange, Document doc, String klasse, String weekName) throws JSONException { v.setLastChange(ParserUtils.parseDateTime(lastChange)); LocalDate weekStart = DateTimeFormat.forPattern("d.M.yyyy").parseLocalDate(weekName); Element table = doc.select("table").first(); List<SubstitutionScheduleDay> days = new ArrayList<>(); for (int i = 0; i < table.select("tr").first().select("td:gt(0)").size(); i++) { LocalDate date = weekStart.plusDays(i); SubstitutionScheduleDay day = null; for (SubstitutionScheduleDay d : v.getDays()) { if (d.getDate().equals(date)) { day = d;// w ww . java 2 s . c om break; } } if (day == null) { day = new SubstitutionScheduleDay(); day.setDate(date); v.addDay(day); } days.add(day); } Elements rows = table.select("> tbody > tr:gt(0)"); Map<Integer, String> lessons = new HashMap<>(); int i = 0; int lessonCounter = 1; while (i < rows.size()) { Element cell = rows.get(i).select("td").first(); String lessonName = cell.text().trim(); if (lessonName.length() > 3) { lessonName = String.valueOf(lessonCounter); } lessons.put(i, lessonName); i += getRowspan(cell); lessonCounter += 1; } // counts the number of columns that will be missing from each row due to a cell with colspan Map<Integer, Integer> columnsToSkip = new HashMap<>(); for (int j = 0; j < rows.size(); j++) { columnsToSkip.put(j, 0); } for (int col = 1; col < days.size(); col++) { int row = 0; while (row < rows.size()) { Element cell = rows.get(row).select("> td").get(col - columnsToSkip.get(row)); String lesson = getTimetableLesson(cell, row, lessons); days.get(col - 1).addAllSubstitutions( parseTimetableCell(cell, lesson, klasse, data.getJSONArray("cellFormat"), colorProvider)); for (int skippedRow = row + 1; skippedRow < row + getRowspan(cell); skippedRow++) { columnsToSkip.put(skippedRow, columnsToSkip.get(skippedRow) + 1); } row += getRowspan(cell); } } }
From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java
private HttpPost getForm(final HttpContext httpContext, final HttpResponse response, final String user, final String password) throws IllegalStateException, IOException { final HttpEntity entity = response.getEntity(); final HttpHost host = (HttpHost) httpContext.getAttribute(ExecutionContext.HTTP_TARGET_HOST); final String requestUri = getLatestRedirectedUrl(httpContext); final String requestBaseUrl = requestUri.substring(0, requestUri.lastIndexOf('/')); final String userFormId = getHtmlElementId(host, FormId.USER); final String passFormId = getHtmlElementId(host, FormId.PASS); final String loginFormId = getHtmlElementId(host, FormId.LOGIN_FORM); final String loginButton = getSsoErrorHandler(host).getSsoLoginButtonName(); log.debug("Looking for HTML input form retrieved from " + requestUri); final List<NameValuePair> formNvps = new ArrayList<NameValuePair>(); final Document doc = Jsoup.parse(entity.getContent(), "UTF-8", requestBaseUrl); final org.jsoup.nodes.Element form = doc .select("form" + (loginFormId == null ? "" : "[id=" + loginFormId + "]")).first(); final String formAction = form.attr("action"); final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction)); final Elements formFields = form.select("input"); for (final Element element : formFields) { final String fieldName = element.attr("name"); String fieldValue = element.attr("value"); final String fieldId = element.attr("id"); log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue, fieldId));// w ww .j a va2 s . c om if (fieldId.equalsIgnoreCase(userFormId)) { fieldValue = user; log.debug(String.format("Set formField user='%s'", user)); } else if (fieldId.equalsIgnoreCase(passFormId)) { log.debug("Set formField password='*******'"); fieldValue = password; } if (loginButton != null && element.attr("type").equalsIgnoreCase("submit")) { if (element.attr("name").equalsIgnoreCase(loginButton)) { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } else { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8")); return formPost; }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected void addSortingSearchFields(List<SearchField> fields) throws IOException, JSONException { final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding()); final Document doc = Jsoup.parse(html); for (int i = 0; i < 3; i++) { final Element tr = doc.select("#sort_editor tr.sort_" + i).first(); final DropdownSearchField field = new DropdownSearchField(); field.setMeaning(SearchField.Meaning.ORDER); field.setId("sort_" + i); field.setDisplayName(tr.select("td").first().text()); field.addDropdownValue("", ""); for (final Element option : tr.select(".crit option")) { if (option.hasAttr("selected")) { field.addDropdownValue(0, option.attr("value"), option.text()); } else { field.addDropdownValue(option.attr("value"), option.text()); }//from ww w . j av a 2s . c o m } fields.add(field); } }
From source file:org.epop.dataprovider.googlescholar.GoogleScholarProvider.java
@Override protected List<Literature> parsePage(Reader page) throws DatalayerException { List<Literature> papers = new ArrayList<Literature>(); Document doc = null;// w ww.j ava 2 s . com try { StringBuilder builder = new StringBuilder(); int charsRead = -1; char[] chars = new char[100]; do { charsRead = page.read(chars, 0, chars.length); // if we have valid chars, append them to end of string. if (charsRead > 0) builder.append(chars, 0, charsRead); } while (charsRead > 0); doc = Jsoup.parse(builder.toString()); } catch (IOException e) { e.printStackTrace(); } // for (Document doc : docs) { for (Element article : doc.select(".gs_r")) { try { LiteratureBuilder litBuilder = new LiteratureBuilder(); // type String typeString = article.select(".gs_ct2").text(); if (typeString == null) typeString = ""; if (typeString.equals("[C]")) continue; // skip citations litBuilder.setType(getLiteratureType(typeString)); // title String title = article.select(".gs_rt a").text(); title = title.replaceAll("\u0097", "-"); title = title.replaceAll("", "..."); if (title.isEmpty()) throw new DatalayerException("title retrieved by parsing is empty"); litBuilder.setTitle(title); // website URL if (litBuilder.getWebsiteURLs() == null) litBuilder.setWebsiteURLs(new HashSet<Link>()); try { String linkURL = article.select(".gs_rt a").attr("href"); litBuilder.getWebsiteURLs().add(new Link(linkURL)); } catch (URISyntaxException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } try { // cluster link String googleLinkURL = "http://scholar.google.com" + article.select(".gs_fl .gs_nph").attr("href"); litBuilder.getWebsiteURLs().add(new Link("Google Scholar", googleLinkURL)); // scholar ID Matcher idMatcher = ID_PATTERN.matcher(googleLinkURL); if (idMatcher.find()) litBuilder.setgScholarID(idMatcher.group(1)); // else // TODO error handling } catch (URISyntaxException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } String abstractText = article.select(".gs_rs").text(); litBuilder.setAbstractText(abstractText); String rawHTML = article.select(".gs_a").html(); if (rawHTML.isEmpty()) // no authors continue; // split by " - " (authors - publication, year - publisher) String[] splits = rawHTML.split(" - "); // if (splits.length != 3) // throw new DatalayerException( // "dashTokenizer should have three sections (authors - publication, year - publisher), found " // + splits.length // + "; maybe Google Scholar layout has changed"); String namesHTML = "", publicationHTML = "", publisherHTML = ""; if (splits.length > 0) { namesHTML = splits[0]; namesHTML = namesHTML.replace(", ", ""); } if (splits.length == 2) { publisherHTML = splits[1]; } if (splits.length > 3) { publicationHTML = splits[1]; publisherHTML = splits[2]; } // authors try { List<Author> authors = getAuthorsFromHTMLSection(namesHTML); litBuilder.setAuthors(new HashSet<>(authors)); } catch (PatternMismatchException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // publication String[] commaSplit = publicationHTML.split(", "); if (commaSplit.length == 2) { String publication = commaSplit[0]; publication = publication.replaceAll("\u0097", "-"); publication = publication.replaceAll("", "..."); litBuilder.setPublicationContext(publication); try { Integer year = Integer.parseInt(commaSplit[1]); litBuilder.setYear(year); } catch (NumberFormatException e) { // throw new ServiceException( // "publicationHTML subsection has invalid format: failed to parse publication year"); // TODO (low) logging } } else { // TODO logging/notify user } // publisher litBuilder.setPublisher(publisherHTML); // citations String citedby = article.select(".gs_fl a[href*=cites]").text(); Matcher cm = CITES_PATTERN.matcher(citedby); try { int cites = cm.find() ? Integer.parseInt(cm.group(1)) : 0; litBuilder.setgScholarNumCitations(cites); } catch (NumberFormatException e) { // TODO } // fulltext String fulltextURL = article.select("div.gs_md_wp.gs_ttss a").attr("href"); Set<Link> fullLinks = new HashSet<>(); try { fullLinks.add(new Link(fulltextURL)); litBuilder.setFulltextURLs(fullLinks); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } papers.add(litBuilder.getObject()); } catch (Exception e) { malformed.add(e.getMessage()); } } // } // if (headerContent.startsWith("User profiles")) { // // only the first part // Element hrefPart = seg.getAllElements().get(0); // String link = hrefPart.getAttributeValue("href"); // assert link.startsWith("/citations"); // String[] data = link.split("[?|&|=]"); // System.out.println("id found for user " + data[2]); // return GoogleScholarGetterFromId.getFromId(data[2]); // // docs.clear(); System.err.println(malformed + " " + malformed.size()); return papers; }
From source file:org.confab.PhpBB3Parser.java
/** * Parses each post for a particular topic. * @param html Html containing the posts to be parsed * @return List of Post objects *//*from ww w . j a va 2 s . c om*/ public List<Post> parsePosts(Document html, ForumThread parent) { Utilities.debug("Starting parsePosts"); List<Post> ret = new ArrayList<Post>(); // Each post should have it's own table Elements div_posts = html.select("div#posts"); assert !div_posts.isEmpty(); Elements posts_table = div_posts.select("table[id~=(post\\d+)]"); assert !posts_table.isEmpty(); for (Element el_post : posts_table) { Post new_post = new Post(parent); // Get post id (id=post\d+) new_post.id = el_post.attr("id").replace("post", "").trim(); assert new_post.id != null; // Get post message Elements el_message = el_post.select("div[id~=(post_message_\\d+)]"); assert !el_message.isEmpty(); new_post.message = el_message.first().text(); assert new_post.message != null; Utilities.debug("new_post.message: " + new_post.message); // Get post author Elements el_author = el_post.select(".bigusername"); assert !el_author.isEmpty(); new_post.author.username = el_author.first().text(); assert new_post.author != null; Utilities.debug("new_post.author: " + new_post.author); ret.add(new_post); } Utilities.debug("Finished parsePosts"); return ret; }
From source file:qhindex.controller.SearchAuthorWorksController.java
private CitingWork extractCitationWork(Element citationElements) { CitingWork citingWork = new CitingWork(); // Extract title(name) and url Elements nameElems = citationElements.select("h3.gs_rt > a"); String urlCitationWork = ""; if (nameElems.size() > 0) { citingWork.setName(nameElems.get(0).text()); urlCitationWork = nameElems.get(0).attr("href"); citingWork.setUrl(urlCitationWork); }//from w w w .ja va2s . c o m // Extract authors, publisher in google/external web/calculated Elements publicationElem = citationElements.select("div.gs_a"); if (publicationElem.size() > 0) { String citationData = publicationElem.get(0).text(); // Get the author data citingWork.setAuthors(divideStringAt(citationData, " - ", true)); // Remove Authors data to extract published in value String citationWithoutAuthors = divideStringAt(citationData, " - ", false); // Remove the year data String publisher = divideStringAt(citationWithoutAuthors, ", ", true); citingWork.setPublisherInGoogle(publisher); // Assumes that only one sequence "..." exist AT THE END to indicate the name is incomplete if (publisher.contains("") && publisher.length() > 5 && urlCitationWork.length() > 0) { try { // Remove the " ..." string at the end String publisherNameIncompleteWithoutDots = publisher.replace("", ""); String resolvedPublisher = resolvePublisher(urlCitationWork, publisherNameIncompleteWithoutDots); citingWork.setPublisherInExternalWeb(resolvedPublisher); publisher = handlePublicationNameCases(resolvedPublisher); } catch (IOException ioEx) { Debug.print("Exception while extracting citing work: " + ioEx.toString()); resultsMsg += "Exception while extracting citing work.\n"; } } else { publisher = removeNonLetterCharsAtBeginning(publisher); publisher = handlePublicationNameCases(publisher); publisher = correctAuthorWorkPublisher(publisher); } citingWork.setPublisher(publisher); } // Extract citation number Elements citationElems = citationElements.select("div.gs_fl > a"); if (citationElems.size() > 0) { String citationNumberData = citationElems.get(0).text(); citationNumberData = citationNumberData.replace("Cited by ", ""); citingWork.setCitationsNumber(citationNumberData); } return citingWork; }
From source file:gov.medicaid.screening.dao.impl.MedicalPracticeLicenseDAOBean.java
/** * Parse the License information./*from w w w . j av a 2 s. c o m*/ * * @param city the license provider city * @param details the details page * @return the parsed license */ private License parseLicense(String city, Document details) { License license = new License(); license.setCity(city); ProviderProfile profile = new ProviderProfile(); license.setProfile(profile); String fullName = details.select("#_ctl7_lblName").text(); User user = new User(); String[] nameParts = fullName.split(","); if (nameParts.length > 0) { user.setLastName(nameParts[0].trim()); } if (nameParts.length > 1) { user.setFirstName(nameParts[1].trim()); } profile.setUser(user); String licenseType = details.select("#_ctl7_ProfileInfoLicense_lblLicType").text(); LicenseType licType = new LicenseType(); licType.setName(licenseType); license.setType(licType); String licenseNo = details.select("#_ctl7_ProfileInfoLicense_lblLicNbr").text(); license.setLicenseNumber(licenseNo); String licensureAddress1 = details.select("#_ctl7_ProfileInfoPublic_lblAddress").text(); String licensureAddress2 = details.select("#_ctl7_ProfileInfoPublic_lblAddress2").text(); String licensureCityState = details.select("#_ctl7_ProfileInfoPublic_lblCity").text(); Address address = new Address(); address.setLocation(licensureAddress1 + " " + licensureAddress2); setCityStateZip(address, licensureCityState); String email = details.select("#_ctl7_ProfileInfoPublic_lblEmail").text(); profile.setContactEmail(email); String birthYear = details.select("#_ctl7_ProfileInfoPublic_lblBirthYear").text(); if (Util.isNotBlank(birthYear)) { profile.setDob(new GregorianCalendar(Integer.parseInt(birthYear), Calendar.JANUARY, 1).getTime()); } String gender = details.select("#_ctl7_ProfileInfoPublic_lblGender").text(); if ("Male".equals(gender)) { profile.setSex(Sex.MALE); } else if ("Female".equals(gender)) { profile.setSex(Sex.FEMALE); } String expirationDate = details.select("#_ctl7_ProfileInfoLicense_lblExpDate").text(); String originalIssueDate = details.select("#_ctl7_ProfileInfoLicense_lblGrantDate").text(); Date issueDate = parseDate(originalIssueDate, DATE_FORMAT); if (issueDate != null) { license.setOriginalIssueDate(issueDate); } Date expireDate = parseDate(expirationDate, DATE_FORMAT); if (expireDate != null) { license.setExpireDate(expireDate); } String licenseStatus = details.select("#_ctl7_ProfileInfoLicense_lblLicStatus").text(); LicenseStatus status = new LicenseStatus(); status.setName(licenseStatus); license.setStatus(status); String disciplinaryAction = details.select("#_ctl7_ProfileInfoLicense_lblDiscAction").text(); String correctiveAction = details.select("#_ctl7_ProfileInfoLicense_lblCorrAction").text(); license.setDiscipline(!"No".equals(disciplinaryAction.trim())); license.setCorrectiveAction(!"No".equals(correctiveAction.trim())); String medSchool = details.select("#_ctl7_ProfileInfoEducation_lblName").text(); MedicalSchool medicalSchool = new MedicalSchool(); medicalSchool.setName(medSchool); license.setMedicalSchool(medicalSchool); String degree = details.select("#_ctl7_ProfileInfoEducation_lblDegree").text(); if ("PhD".equals(degree.trim())) { profile.setDegree(Degree.DOCTORATE); } else if (!Util.isBlank(degree)) { profile.setDegree(Degree.MASTER); } PrivatePractice privatePractice = new PrivatePractice(); profile.setPrivatePractice(privatePractice); String primaryAddressName = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryName").text(); String primaryAddress1 = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryAddress").text(); String primaryCityState = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryAddress2").text(); String primaryPhone = details.select("#_ctl7_ProfileInfoPractices_lblPrimaryPhone").text(); if (Util.isNotBlank(primaryAddressName) || Util.isNotBlank(primaryAddress1) || Util.isNotBlank(primaryCityState)) { Address primary = new Address(); address.setLocation(primaryAddressName + " " + primaryAddress1); setCityStateZip(primary, primaryCityState); privatePractice.setOfficeAddress(primary); } privatePractice.setOfficePhoneNumber(primaryPhone); String secondaryAddressName = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryName").text(); String secondaryAddress1 = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryAddress").text(); String secondaryCityState = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryAddress2").text(); String secondaryPhone = details.select("#_ctl7_ProfileInfoPractices_lblSecondaryPhone").text(); if (Util.isNotBlank(secondaryAddressName) || Util.isNotBlank(secondaryAddress1) || Util.isNotBlank(secondaryCityState)) { Address secondary = new Address(); address.setLocation(secondaryAddressName + " " + secondaryAddress1); setCityStateZip(secondary, secondaryCityState); privatePractice.setSecondaryAddress(secondary); } privatePractice.setSecondaryPhoneNumber(secondaryPhone); Elements specialties = details.select("#_ctl7_ProfileInfoSpecialty_dgSpecialty tr:gt(0)"); List<Specialty> sps = new ArrayList<Specialty>(); for (Element element : specialties) { Specialty sp = new Specialty(); SpecialtyType spt = new SpecialtyType(); spt.setName(element.select("td:eq(0)").text()); sp.setType(spt); sp.setName(element.select("td:eq(1)").text()); sps.add(sp); } profile.setSpecialties(sps); return license; }
From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java
/** * Parst eine Vertretungstabelle eines Untis-Vertretungsplans * /*from w w w . j a v a 2s . c om*/ * @param table * das <code>table</code>-Element des HTML-Dokuments, das geparst * werden soll * @param data * Daten von der Schule (aus <code>Schule.getData()</code>) * @param tag * der {@link VertretungsplanTag} in dem die Vertretungen * gespeichert werden sollen * @throws JSONException */ protected void parseVertretungsplanTable(Element table, JSONObject data, VertretungsplanTag tag) throws JSONException { if (data.optBoolean("class_in_extra_line")) { for (Element element : table.select("td.inline_header")) { String className = getClassName(element.text(), data); if (isValidClass(className)) { KlassenVertretungsplan kv = new KlassenVertretungsplan(className); Element zeile = null; try { zeile = element.parent().nextElementSibling(); if (zeile.select("td") == null) { zeile = zeile.nextElementSibling(); } while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) { Vertretung v = new Vertretung(); int i = 0; for (Element spalte : zeile.select("td")) { if (!hasData(spalte.text())) { i++; continue; } String type = data.getJSONArray("columns").getString(i); if (type.equals("lesson")) v.setLesson(spalte.text()); else if (type.equals("subject")) v.setSubject(spalte.text()); else if (type.equals("previousSubject")) v.setPreviousSubject(spalte.text()); else if (type.equals("type")) v.setType(spalte.text()); else if (type.equals("type-entfall")) { if (spalte.text().equals("x")) v.setType("Entfall"); else v.setType("Vertretung"); } else if (type.equals("room")) v.setRoom(spalte.text()); else if (type.equals("teacher")) v.setTeacher(spalte.text()); else if (type.equals("previousTeacher")) v.setPreviousTeacher(spalte.text()); else if (type.equals("desc")) v.setDesc(spalte.text()); else if (type.equals("desc-type")) { v.setDesc(spalte.text()); v.setType(recognizeType(spalte.text())); } else if (type.equals("previousRoom")) v.setPreviousRoom(spalte.text()); i++; } if (v.getDesc() != null && v.getLesson() == null && v.getPreviousRoom() == null && v.getPreviousSubject() == null && v.getPreviousTeacher() == null && v.getRoom() == null && v.getSubject() == null && v.getTeacher() == null && v.getType() == null) { // Beschreibung aus der letzten Zeile fortgesetzt Vertretung previousVertretung = kv.getVertretung() .get(kv.getVertretung().size() - 1); previousVertretung.setDesc(previousVertretung.getDesc() + " " + v.getDesc()); zeile = zeile.nextElementSibling(); continue; } if (v.getType() == null) v.setType("Vertretung"); if (!v.getLesson().equals("")) { kv.add(v); } zeile = zeile.nextElementSibling(); } tag.getKlassen().put(className, kv); } catch (Throwable e) { e.printStackTrace(); } } } } else { boolean hasType = false; for (int i = 0; i < data.getJSONArray("columns").length(); i++) { if (data.getJSONArray("columns").getString(i).equals("type")) hasType = true; } Vertretung previousVertretung = null; for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), " + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]:has(font[color]))")) { Vertretung v = new Vertretung(); String klassen = ""; int i = 0; for (Element spalte : zeile.select("td")) { if (!hasData(spalte.text())) { i++; continue; } String type = data.getJSONArray("columns").getString(i); if (type.equals("lesson")) v.setLesson(spalte.text()); else if (type.equals("subject")) v.setSubject(spalte.text()); else if (type.equals("previousSubject")) v.setPreviousSubject(spalte.text()); else if (type.equals("type")) v.setType(spalte.text()); else if (type.equals("type-entfall")) { if (spalte.text().equals("x")) v.setType("Entfall"); else if (!hasType) v.setType("Vertretung"); } else if (type.equals("room")) v.setRoom(spalte.text()); else if (type.equals("previousRoom")) v.setPreviousRoom(spalte.text()); else if (type.equals("desc")) v.setDesc(spalte.text()); else if (type.equals("desc-type")) { v.setDesc(spalte.text()); v.setType(recognizeType(spalte.text())); } else if (type.equals("teacher")) v.setTeacher(spalte.text()); else if (type.equals("previousTeacher")) v.setPreviousTeacher(spalte.text()); else if (type.equals("class")) klassen = getClassName(spalte.text(), data); i++; } if (v.getDesc() != null && v.getLesson() == null && v.getPreviousRoom() == null && v.getPreviousSubject() == null && v.getPreviousTeacher() == null && v.getRoom() == null && v.getSubject() == null && v.getTeacher() == null && v.getType() == null && previousVertretung != null) { // Beschreibung aus der letzten Zeile fortgesetzt previousVertretung.setDesc(previousVertretung.getDesc() + " " + v.getDesc()); continue; } if (v.getType() == null) { if (zeile.select("strike").size() > 0 || (v.getSubject() == null && v.getRoom() == null && v.getTeacher() == null && v.getPreviousSubject() != null)) v.setType("Entfall"); else v.setType("Vertretung"); } List<String> affectedClasses; // Detect things like "5-12" Pattern pattern = Pattern.compile("(\\d+) ?- ?(\\d+)"); Matcher matcher = pattern.matcher(klassen); if (matcher.find()) { affectedClasses = new ArrayList<String>(); int min = Integer.parseInt(matcher.group(1)); int max = Integer.parseInt(matcher.group(2)); try { for (String klasse : getAllClasses()) { Pattern pattern2 = Pattern.compile("\\d+"); Matcher matcher2 = pattern2.matcher(klasse); if (matcher2.find()) { int num = Integer.parseInt(matcher2.group()); if (min <= num && num <= max) affectedClasses.add(klasse); } } } catch (IOException e) { e.printStackTrace(); } } else { if (data.optBoolean("classes_separated", true)) { affectedClasses = Arrays.asList(klassen.split(", ")); } else { affectedClasses = new ArrayList<String>(); try { for (String klasse : getAllClasses()) { // TODO: // Gibt es // eine // bessere // Mglichkeit? StringBuilder regex = new StringBuilder(); for (char character : klasse.toCharArray()) { regex.append(character); regex.append(".*"); } if (klassen.matches(regex.toString())) affectedClasses.add(klasse); } } catch (IOException e) { e.printStackTrace(); } } } for (String klasse : affectedClasses) { if (isValidClass(klasse)) { KlassenVertretungsplan kv = tag.getKlassen().get(klasse); if (kv == null) kv = new KlassenVertretungsplan(klasse); kv.add(v); tag.getKlassen().put(klasse, kv); } } previousVertretung = v; } } if (data.optBoolean("sort_classes")) { List<KlassenVertretungsplan> list = new ArrayList<>(tag.getKlassen().values()); Collections.sort(list, new Comparator<KlassenVertretungsplan>() { @Override public int compare(KlassenVertretungsplan o1, KlassenVertretungsplan o2) { return o1.getKlasse().compareTo(o2.getKlasse()); } }); LinkedHashMap<String, KlassenVertretungsplan> hashMap = new LinkedHashMap<>(); for (KlassenVertretungsplan klasse : list) { hashMap.put(klasse.getKlasse(), klasse); } tag.setKlassen(hashMap); } }