List of usage examples for org.jsoup.nodes Element child
public Element child(int index)
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
static List<ReservedItem> parse_reslist(Document doc) { List<ReservedItem> reservations = new ArrayList<>(); Elements copytrs = doc.select(".data tr"); int trs = copytrs.size(); if (trs <= 1) { return null; }//from www . j a va 2 s . com for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); ReservedItem item = new ReservedItem(); if (tr.text().contains("keine Daten") || tr.children().size() == 1) { return null; } item.setTitle(tr.child(2).select("b, strong").text().trim()); try { String[] rowsplit2 = tr.child(2).html().split("<br[ /]*>"); String[] rowsplit3 = tr.child(3).html().split("<br[ /]*>"); if (rowsplit2.length > 1) item.setAuthor(rowsplit2[1].trim()); if (rowsplit3.length > 2) item.setBranch(rowsplit3[2].trim()); if (rowsplit3.length > 2) { item.setStatus(rowsplit3[0].trim() + " (" + rowsplit3[1].trim() + ")"); } } catch (Exception e) { e.printStackTrace(); } reservations.add(item); } return reservations; }
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public String getEventPlaces(String place) { String res = ""; try {/*from www. j a va2 s .c o m*/ Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get(); Elements elems = placeDoc.select("p"); for (Element e : elems) { if (e.parents().get(1).html().contains("<div style")) { if (e.children().size() > 1) { if (e.child(1).hasAttr("href")) { res = e.child(1).html() + " ?"; } } else if (e.children().isEmpty()) { res = e.html() + " ?"; } } } } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } return res; }
From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java
@Override public List<RawBankAccount> getBankAccounts(String authTokenString) throws IOException, ParseException, AuthenticationException { AuthToken authToken = AuthToken.fromJson(authTokenString); String response = loadPageWithAuth(getHttpClient(), authToken, LIST_ACCOUNTS_XML_ID); Document doc = Jsoup.parse(response, BASE_URL); Element content = doc.getElementById("main"); if (content == null) { throw new ParseException("getBankAccounts: can't find #main"); }//ww w . j av a 2 s . c o m Elements tables = content.select("section.result table.data"); if (tables == null || tables.size() == 0) { throw new ParseException("getBankAccounts: can't find table section.result table.data"); } Elements rows = tables.first().getElementsByTag("tr"); if (rows == null || rows.size() == 0) { throw new ParseException("getBankAccounts: first table is empty"); } ArrayList<RawBankAccount> bankAccounts = new ArrayList<RawBankAccount>(rows.size()); String type = "undef"; for (Element row : rows) { if (row.getElementsByTag("th").size() > 0) { // header row type = row.child(0).text(); } else { RawBankAccount bankAccount = obtainBankAccountFromHtmlTableRow(type, row); if (bankAccount != null) { bankAccounts.add(bankAccount); } } } return bankAccounts; }
From source file:de.geeksfactory.opacclient.apis.WebOpacAt.java
@Override public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException { if (!initialised) { start();// ww w. j ava 2 s .co m } final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element detailData = doc.select(".detailData").first(); final Element detailTable = detailData.select("table.titel").first(); final Element availabilityTable = doc.select(".bibliothek table").first(); final DetailledItem result = new DetailledItem(); final Copy copy = new Copy(); result.addCopy(copy); result.setId(id); result.setCover(getCover(doc)); result.setTitle(detailData.select("h3").first().text()); result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media"))); copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available")); copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent"))); copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations")); for (final Element tr : detailTable.select("tr")) { final String desc = tr.child(0).text(); final String content = tr.child(1).text(); if (desc != null && !desc.trim().isEmpty()) { result.addDetail(new Detail(desc, content)); } else if (!result.getDetails().isEmpty()) { final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1); lastDetail.setHtml(true); lastDetail.setContent(lastDetail.getContent() + "\n" + content); } } return result; }
From source file:de.geeksfactory.opacclient.apis.Littera.java
@Override public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException { if (!initialised) { start();// w w w . ja va2 s . c om } final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element detailData = doc.select(".detailData").first(); final Element detailTable = detailData.select("table.titel").first(); final Element availabilityTable = doc.select(".bibliothek table").first(); final DetailledItem result = new DetailledItem(); final Copy copy = new Copy(); result.addCopy(copy); result.setId(id); result.setCover(getCover(doc)); result.setTitle(detailData.select("h3").first().text()); result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media"))); copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available")); copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent"))); copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations")); for (final Element tr : detailTable.select("tr")) { final String desc = tr.child(0).text(); final String content = tr.child(1).text(); if (desc != null && !desc.trim().equals("")) { result.addDetail(new Detail(desc, content)); } else if (!result.getDetails().isEmpty()) { final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1); lastDetail.setHtml(true); lastDetail.setContent(lastDetail.getContent() + "\n" + content); } } return result; }
From source file:eu.masconsult.bgbanking.banks.dskbank.DskClient.java
private RawBankAccount obtainBankAccountFromHtmlTableRow(Element row) { // skip title rows if (row.children().size() != 4) { return null; }//from w w w. j av a 2 s .c o m // skip header if (row.hasClass("td-header")) { return null; } String onclick = row.child(0).child(0).attr("onclick"); Matcher matcher = PATTERN_MATCH_BANK_ACCOUNT_ID.matcher(onclick); if (!matcher.find()) { throw new ParseException("can't find bank account id in " + onclick); } return new RawBankAccount().setServerId(matcher.group(1)).setName(row.child(0).text()) .setIBAN(row.child(1).text()).setCurrency(row.child(2).text()) .setBalance(Convert.strToFloat(row.child(3).text())) .setAvailableBalance(Convert.strToFloat(row.child(3).text())); }
From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java
/** * ???//from ww w . ja v a2 s .co m * * @param eleTrs * @param rowNo * @return */ private String parseDetailTr(Element eleTr) throws Exception { Element eleTd = eleTr.select("td").get(1); // td if (eleTd.children().size() > 0) { return eleTd.child(0).html(); } else { return eleTd.html().trim(); } }
From source file:edu.harvard.iq.safe.lockss.impl.LOCKSSPlatformStatusHtmlParser.java
/** * * @param is/* w ww.ja v a2 s.co m*/ */ @Override public void getPlatformStatusData(InputStream is) { try { Document doc = DataUtil.load(is, "UTF-8", ""); Element body = doc.body(); // most of the target items are sandwitched by <b> tag // this can be used to reach each target item. String tmpCurrentTime = null; String tmpUpTime = null; String currentTime = null; Elements tags = body.getElementsByTag("b"); for (Element tag : tags) { // get the current-time string: for 1.52.3 or older daemons // this is the ony place to get it. String tagText = tag.text(); logger.log(Level.FINE, "working on tagText={0}", tagText); if (tagText.equals("Daemon Status")) { // find current time and up running currentTime = tag.parent().parent().text(); logger.log(Level.INFO, "currentTime text=[{0}]", currentTime); // "currentTime =Daemon Status lockss.statelib.lib.in.us (usdocspln group) 01:25:55 03/01/12, up 7d5h21m" tmstmpMatcher = currentTimeStampPattern.matcher(currentTime); if (tmstmpMatcher.find()) { logger.log(Level.INFO, "group 0={0}", tmstmpMatcher.group(0)); tmpCurrentTime = tmstmpMatcher.group(1); logger.log(Level.INFO, "Current Time:group 1={0}", tmpCurrentTime); tmpUpTime = tmstmpMatcher.group(2); logger.log(Level.INFO, "UpTime:group 2={0}", tmpUpTime); } } // get the remaining key-value sets if (fieldNameSet.contains(tagText)) { Element parent = tag.parent(); String fieldValue = parent.nextElementSibling().text(); logger.log(Level.FINE, "{0}={1}", new Object[] { tagText, fieldValue }); summaryInfoMap.put(tagText, fieldValue); } } // extract the daemon version and platform info that are located // at the bottom // these data are sandwitched by a <center> tag Elements ctags = body.getElementsByTag("center"); String version = null; String platform = null; for (Element ctag : ctags) { String cText = ctag.text(); logger.log(Level.FINE, "center tag Text={0}", cText); // cText is like this: // Daemon 1.53.3 built 28-Jan-12 01:06:36 on build7.lockss.org, Linux RPM 1 if (StringUtils.isNotBlank(cText) && ctag.child(0).nodeName().equals("font")) { String[] versionPlatform = cText.split(", "); if (versionPlatform.length == 2) { logger.log(Level.INFO, "daemon version={0};platform={1}", versionPlatform); version = DaemonStatusDataUtil.getDaemonVersion(versionPlatform[0]); platform = versionPlatform[1]; } else { // the above regex failed logger.log(Level.WARNING, "String-formatting differs; use pattern matching"); version = DaemonStatusDataUtil.getDaemonVersion(cText); int platformOffset = cText.lastIndexOf(", ") + 2; platform = cText.substring(platformOffset); logger.log(Level.INFO, "platform={0}", platform); } } } if (summaryInfoMap.containsKey("V3 Identity")) { String ipAddress = DaemonStatusDataUtil.getPeerIpAddress(summaryInfoMap.get("V3 Identity")); logger.log(Level.INFO, "ipAddress={0}", ipAddress); if (StringUtils.isNotBlank(ipAddress)) { boxInfoMap.put("host", ipAddress); if (!ipAddress.equals(summaryInfoMap.get("IP Address"))) { summaryInfoMap.put("IP Address", ipAddress); } } else { logger.log(Level.WARNING, "host token is blank or null: use IP Address instead"); logger.log(Level.INFO, "IP Address={0}", summaryInfoMap.get("IP Address")); boxInfoMap.put("host", summaryInfoMap.get("IP Address")); } } // for pre-1.53.3 versions boxInfoMap.put("time", tmpCurrentTime); if (!summaryInfoMap.containsKey("Current Time")) { summaryInfoMap.put("Current Time", tmpCurrentTime); } boxInfoMap.put("up", tmpUpTime); if (!summaryInfoMap.containsKey("Uptime")) { summaryInfoMap.put("Uptime", tmpUpTime); } boxInfoMap.put("version", version); if (!summaryInfoMap.containsKey("Daemon Version")) { summaryInfoMap.put("Daemon Version", version); } boxInfoMap.put("platform", platform); if (!summaryInfoMap.containsKey("Platform")) { summaryInfoMap.put("Platform", platform); } } catch (IOException ex) { logger.log(Level.SEVERE, "IO error", ex); } logger.log(Level.INFO, "boxInfoMap={0}", boxInfoMap); logger.log(Level.INFO, "summaryInfo={0}", summaryInfoMap); }
From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java
private RawBankAccount obtainBankAccountFromHtmlTableRow(String type, Element row) { if ("detail".equalsIgnoreCase(row.attr("class"))) { // detail row return null; }// w ww .j a v a 2 s. co m if ("bg0".equalsIgnoreCase(row.attr("class"))) { Log.v(TAG, "working row(" + type + "): " + row.html()); if ("Current Accounts".equalsIgnoreCase(type)) { return new RawBankAccount().setServerId(row.child(2).text()).setName(row.child(0).child(0).text()) .setIBAN(row.child(2).text()).setCurrency(row.child(1).text()) .setBalance(Convert.strToFloat(row.child(3).text())) .setAvailableBalance(Convert.strToFloat(row.child(4).text())); } else if ("Cards".equalsIgnoreCase(type)) { // skip cards for now return null; } else { // unknown type return null; } } else { return null; } }