List of usage examples for org.jsoup.nodes Element child
public Element child(int index)
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
public static AccountData parse_account(Account acc, Document doc, JSONObject data) throws JSONException { JSONObject copymap = data.getJSONObject("accounttable"); List<LentItem> media = new ArrayList<>(); if (doc.select(".kontozeile_center table").size() == 0) { return null; }// w w w .j av a 2 s . c o m Elements exemplartrs = doc.select(".kontozeile_center table").get(0).select("tr.tabKonto"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (int i = 0; i < exemplartrs.size(); i++) { Element tr = exemplartrs.get(i); LentItem item = new LentItem(); Iterator<?> keys = copymap.keys(); while (keys.hasNext()) { String key = (String) keys.next(); int index; try { index = copymap.has(key) ? copymap.getInt(key) : -1; } catch (JSONException e1) { index = -1; } if (index >= 0) { if (key.equals("prolongurl")) { if (tr.child(index).children().size() > 0) { item.setProlongData(tr.child(index).child(0).attr("href")); item.setRenewable(tr.child(index).child(0).attr("href").contains("vermsg")); } } else if (key.equals("returndate")) { try { item.setDeadline(fmt.parseLocalDate(tr.child(index).text())); } catch (IllegalArgumentException e1) { e1.printStackTrace(); } } else { item.set(key, tr.child(index).text()); } } } media.add(item); } assert (doc.select(".kontozeile_center table").get(0).select("tr").size() > 0); assert (exemplartrs.size() == media.size()); copymap = data.getJSONObject("reservationtable"); List<ReservedItem> reservations = new ArrayList<>(); exemplartrs = doc.select(".kontozeile_center table").get(1).select("tr.tabKonto"); for (int i = 0; i < exemplartrs.size(); i++) { Element tr = exemplartrs.get(i); ReservedItem item = new ReservedItem(); Iterator<?> keys = copymap.keys(); while (keys.hasNext()) { String key = (String) keys.next(); int index; try { index = copymap.has(key) ? copymap.getInt(key) : -1; } catch (JSONException e1) { index = -1; } if (index >= 0) { if (key.equals("cancelurl")) { if (tr.child(index).children().size() > 0) { item.setCancelData(tr.child(index).child(0).attr("href")); } } else if (key.equals("availability")) { try { item.setReadyDate(fmt.parseLocalDate(tr.child(index).text())); } catch (IllegalArgumentException e1) { item.setStatus(tr.child(index).text()); } } else if (key.equals("expirationdate")) { try { item.setExpirationDate(fmt.parseLocalDate(tr.child(index).text())); } catch (IllegalArgumentException e1) { item.setStatus(tr.child(index).text()); } } else { item.set(key, tr.child(index).text()); } } } reservations.add(item); } assert (doc.select(".kontozeile_center table").get(1).select("tr").size() > 0); assert (exemplartrs.size() == reservations.size()); AccountData res = new AccountData(acc.getId()); for (Element row : doc.select(".kontozeile_center, div[align=center]")) { String text = row.text().trim(); if (text.matches(".*Ausstehende Geb.+hren:[^0-9]+([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr.).*")) { text = text.replaceAll( ".*Ausstehende Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.).*", "$1 $2"); res.setPendingFees(text); } if (text.matches("Ihr Ausweis ist g.ltig bis:.*")) { text = text.replaceAll("Ihr Ausweis ist g.ltig bis:[^A-Za-z0-9]+", ""); res.setValidUntil(text); } else if (text.matches("Ausweis g.ltig bis:.*")) { text = text.replaceAll("Ausweis g.ltig bis:[^A-Za-z0-9]+", ""); res.setValidUntil(text); } } res.setLent(media); res.setReservations(reservations); return res; }
From source file:mobi.jenkinsci.ci.client.JenkinsClient.java
private HashMap<String, Issue> getIssuesFromTable(final Element changesTable) { final HashMap<String, Issue> issues = new HashMap<String, ChangeSetItem.Issue>(); if (changesTable == null) { return issues; }/*from ww w. ja v a2s. c o m*/ if (changesTable.children().size() <= 0) { LOG.warn("Cannot find changes TBODY"); return issues; } final Element tbody = changesTable.child(0); final Elements rows = tbody.children(); for (final Element row : rows) { final String commitId = getCommitIdFromRow(row); Issue issue; try { issue = getIssueFromRow(row); if (issue != null) { issues.put(commitId, issue); } } catch (final MalformedURLException e) { LOG.warn("Invalid issue URL for row " + row.toString() + ": skipping", e); } } return issues; }
From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java
/** * ??// w w w .j a v a2 s .c om * * @param city * @param html * @return */ private List<TvStation> parseTvStation(String city, String html) { Document doc = Jsoup.parse(html); Elements classifyElements = doc.select("div.chlsnav div.pbar b"); String classify = classifyElements.get(0).text().trim(); List<TvStation> resultList = new ArrayList<TvStation>(); Elements channelElements = doc.select("div.chlsnav ul.r li"); for (Element element : channelElements) { Element channel = element.child(0); TvStation tv = new TvStation(); String stationName = channel.text().trim(); tv.setName(stationName); tv.setCity(city); tv.setClassify(classify); tv.setSequence(SEQUENCE.incrementAndGet()); for (CrawlEventListener listener : listeners) { listener.itemFound(new TvStationFoundEvent(this, tv)); } resultList.add(tv); } return resultList; }
From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java
@Override public boolean exists(TvStation station) { String city = station.getCity(); String classify = station.getClassify(); if (city == null || classify == null) { return false; }//from ww w .jav a2s .c o m String tvMaoFile = getCrawlFilePath(station); File file = new File(tvMaoFile); if (file.exists()) { String html = null; try { html = MyTvUtils.readAsHtml(tvMaoFile); } catch (IOException e) { return false; } Document doc = Jsoup.parse(html); Elements classifyElements = doc.select("div.chlsnav div.pbar b"); String classifyName = classifyElements.get(0).text().trim(); Elements channelElements = doc.select("div.chlsnav ul.r li"); for (Element element : channelElements) { Element channel = element.child(0); String stationName = channel.text().trim(); if (stationName.equals(station.getName()) && classifyName.equals(classify)) { return true; } } return false; } HtmlPage htmlPage = (HtmlPage) WebCrawler.crawl(TV_MAO_URL); try { if ((htmlPage = searchStation(htmlPage, station)) != null) { MyTvUtils.outputCrawlData(getCrawlerName(), htmlPage.asXml(), getCrawlFileName(city, classify)); return true; } } catch (Exception e) { logger.error("error occur while search station: " + station.getName(), e); } return false; }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
protected List<ReservedItem> parse_reservations(String html) { Document doc = Jsoup.parse(html); List<ReservedItem> reservations = new ArrayList<>(); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : doc.select("table.kontopos tr")) { ReservedItem item = new ReservedItem(); Element desc = tr.child(1).select("label").first(); Element pos = tr.child(3); if (tr.child(1).select("a").size() > 0) { String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey"); item.setId(kk);// w ww.j av a 2 s . com } if (tr.child(0).select("input").size() > 0) { item.setCancelData(tr.child(0).select("input").first().val()); } int i = 0; for (Node node : desc.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0) { item.setAuthor(text); } else if (i == 1) { item.setTitle(text); } i++; } } i = 0; for (Node node : pos.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0 && text.contains("")) { try { item.setReadyDate(fmt.parseLocalDate(text)); } catch (IllegalArgumentException e) { item.setStatus(text); } } else if (i == 1) { item.setBranch(text); } i++; } } reservations.add(item); } return reservations; }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public AccountData account(Account account) throws IOException, JSONException, OpacErrorException { login(account);/*from ww w.j a va2s .c o m*/ String html; Document doc; AccountData adata = new AccountData(account.getId()); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); html = httpGet(opac_url + "/konto.cgi?sess=" + sessid, getDefaultEncoding()); doc = Jsoup.parse(html); doc.setBaseUri(opac_url + "/"); for (Element td : doc.select("table.konto td")) { if (td.text().contains("Offene")) { String text = td.text().trim().replaceAll( "Offene[^0-9]+Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.)", "$1 $2"); adata.setPendingFees(text); } } List<LentItem> lent = new ArrayList<>(); for (Element tr : doc.select("table.kontopos tr")) { LentItem item = new LentItem(); Element desc = tr.child(1).select("label").first(); String dates = tr.child(2).text().trim(); if (tr.child(1).select("a").size() > 0) { String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey"); item.setId(kk); } int i = 0; for (Node node : desc.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0) { item.setAuthor(text); } else if (i == 1) { item.setTitle(text); } else if (text.contains("Mediennummer")) { item.setBarcode(text.replace("Mediennummer: ", "")); } i++; } } if (tr.child(0).select("input").size() == 1) { item.setProlongData(tr.child(0).select("input").first().val()); item.setRenewable(true); } else { item.setProlongData("" + tr.child(0).select("span").first().attr("class")); item.setRenewable(false); } String todate = dates; if (todate.contains("-")) { String[] datesplit = todate.split("-"); todate = datesplit[1].trim(); } try { item.setDeadline(fmt.parseLocalDate(todate.substring(0, 10))); } catch (IllegalArgumentException e) { e.printStackTrace(); } lent.add(item); } adata.setLent(lent); List<ReservedItem> reservations = new ArrayList<>(); html = httpGet(opac_url + "/konto.cgi?konto=v&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); html = httpGet(opac_url + "/konto.cgi?konto=b&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); adata.setReservations(reservations); return adata; }
From source file:de.geeksfactory.opacclient.apis.Zones22.java
private DetailledItem parse_result(String id, String html) throws IOException { Document doc = Jsoup.parse(html); DetailledItem result = new DetailledItem(); result.setTitle(""); boolean title_is_set = false; result.setId(id);// w ww .java2 s . c o m Elements detaildiv = doc.select("div.record-item-new"); Elements detailtrs1 = doc.select(".DetailDataCell table table:not(.inRecordHeader) tr"); for (int i = 0; i < detailtrs1.size(); i++) { Element tr = detailtrs1.get(i); int s = tr.children().size(); if (tr.child(0).text().trim().equals("Titel") && !title_is_set) { result.setTitle(tr.child(s - 1).text().trim()); title_is_set = true; } else if (s > 1) { Element valchild = tr.child(s - 1); if (valchild.select("table").isEmpty()) { String val = valchild.text().trim(); if (val.length() > 0) result.addDetail(new Detail(tr.child(0).text().trim(), val)); } } } for (Element a : doc.select("a.SummaryActionLink")) { if (a.text().contains("Vormerken")) { result.setReservable(true); result.setReservation_info(a.attr("href")); } } if (!detaildiv.isEmpty()) { for (int i = 0; i < detaildiv.size(); i++) { Element dd = detaildiv.get(i); String text = ""; for (Node node : dd.childNodes()) { if (node instanceof TextNode) { String snip = ((TextNode) node).text(); if (snip.length() > 0) text += snip; } else if (node instanceof Element) { if (((Element) node).tagName().equals("br")) text += "\n"; else { String snip = ((Element) node).text().trim(); if (snip.length() > 0) text += snip; } } } result.addDetail(new Detail("", text)); } } if (doc.select("span.z3988").size() > 0) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. String z3988data = doc.select("span.z3988").first().attr("title").trim(); for (String pair : z3988data.split("\\&")) { String[] nv = pair.split("=", 2); if (nv.length == 2) { if (!nv[1].trim().equals("")) { if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.au")) { result.addDetail(new Detail("Author", nv[1])); } } } } } Elements copydivs = doc.select(".DetailDataCell div[id^=stock_]"); String pop = ""; for (int i = 0; i < copydivs.size(); i++) { Element div = copydivs.get(i); if (div.attr("id").startsWith("stock_head")) { pop = div.text().trim(); continue; } Map<String, String> copy = new HashMap<String, String>(); // This is getting very ugly - check if it is valid for libraries // which are not // Hamburg. int j = 0; for (Node node : div.childNodes()) { try { if (node instanceof Element) { if (((Element) node).tag().getName().equals("br")) { copy.put(DetailledItem.KEY_COPY_BRANCH, pop); result.addCopy(copy); j = -1; } else if (((Element) node).tag().getName().equals("b") && j == 1) { copy.put(DetailledItem.KEY_COPY_LOCATION, ((Element) node).text()); } else if (((Element) node).tag().getName().equals("b") && j > 1) { copy.put(DetailledItem.KEY_COPY_STATUS, ((Element) node).text()); } j++; } else if (node instanceof TextNode) { if (j == 0) copy.put(DetailledItem.KEY_COPY_DEPARTMENT, ((TextNode) node).text()); if (j == 2) copy.put(DetailledItem.KEY_COPY_BARCODE, ((TextNode) node).getWholeText().trim().split("\n")[0].trim()); if (j == 6) { String text = ((TextNode) node).text().trim(); copy.put(DetailledItem.KEY_COPY_RETURN, text.substring(text.length() - 10)); } j++; } } catch (Exception e) { e.printStackTrace(); } } } return result; }
From source file:de.geeksfactory.opacclient.apis.Zones.java
private String getValue(Element node) { if (version18) { return node.child(4).text().trim(); } else {//from w w w .j av a2s . c o m return node.select(".SummaryFieldData").text(); } }
From source file:de.geeksfactory.opacclient.apis.Zones.java
private String getName(Element node) { if (version18) { return node.child(0).text().trim(); } else {/*from w w w.j ava 2 s. c o m*/ return node.select(".SummaryFieldLegend").text(); } }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException { String html = httpGet(opac_url + "/konto.cgi?sess=" + sessid + "&email=&verl=Gesamtkontoverlngerung", ENCODING);//from w w w . ja v a 2 s . c o m Document doc = Jsoup.parse(html); if (doc.select("input[name=pw]").size() > 0) { try { login(account); } catch (OpacErrorException e) { return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage()); } return prolongAll(account, useraction, selection); } List<Map<String, String>> result = new ArrayList<>(); Map<String, String> line = new HashMap<>(); for (Element tr : doc.select(".kontobox table tbody tr")) { if (tr.children().size() < 2) { if (line.size() > 0) { line.put(ProlongAllResult.KEY_LINE_MESSAGE, tr.child(0).text().trim()); result.add(line); line = new HashMap<>(); } continue; } String label = tr.child(0).text(); String text = tr.child(1).text().trim(); if (label.contains("Verfasser")) { line.put(ProlongAllResult.KEY_LINE_AUTHOR, text); } else if (label.contains("Titel")) { line.put(ProlongAllResult.KEY_LINE_TITLE, text); } else if (label.contains("Altes Leihfristende")) { line.put(ProlongAllResult.KEY_LINE_OLD_RETURNDATE, text); } else if (label.contains("Neues")) { line.put(ProlongAllResult.KEY_LINE_NEW_RETURNDATE, text); } } return new ProlongAllResult(MultiStepResult.Status.OK, result); }