List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start();/*w w w .ja v a 2 s . c om*/ LoginResponse login = login(acc); if (!login.success) { return null; } AccountData adata = new AccountData(acc.getId()); if (login.warning != null) { adata.setWarning(login.warning); } // Lent media httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING); String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING); List<LentItem> lent = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<LentItem> nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } if (doc.select(".pagination").size() > 0 && lent != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } } } adata.setLent(lent); // Requested media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> requested = new ArrayList<>(); List<ReservedItem> nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } } } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } } } adata.setReservations(requested); // Fees if (doc.select("#fees").size() > 0) { String text = doc.select("#fees").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); adata.setPendingFees(text); } } return adata; }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, NotReachableException { Document doc = Jsoup.parse(html); if (doc.select("h4").size() > 0) { if (doc.select("h4").text().trim().startsWith("0 gefundene Medien")) { // nothing found return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } else if (!doc.select("h4").text().trim().contains("gefundene Medien") && !doc.select("h4").text().trim().contains("Es wurden mehr als")) { // error throw new OpacErrorException(doc.select("h4").text().trim()); }/*from w w w . ja v a 2 s . c o m*/ } else if (doc.select("h1").size() > 0) { if (doc.select("h1").text().trim().contains("RUNTIME ERROR")) { // Server Error throw new NotReachableException("IOPAC RUNTIME ERROR"); } else { throw new OpacErrorException(stringProvider.getFormattedString( StringProvider.UNKNOWN_ERROR_WITH_DESCRIPTION, doc.select("h1").text().trim())); } } else { return null; } updateRechnr(doc); reusehtml = html; results_total = -1; if (doc.select("h4").text().trim().contains("Es wurden mehr als")) { results_total = 200; } else { String resultnumstr = doc.select("h4").first().text(); resultnumstr = resultnumstr.substring(0, resultnumstr.indexOf(" ")).trim(); results_total = Integer.parseInt(resultnumstr); } List<SearchResult> results = new ArrayList<>(); Elements tables = doc.select("table").first().select("tr:has(td)"); Map<String, Integer> colmap = new HashMap<>(); Element thead = doc.select("table").first().select("tr:has(th)").first(); int j = 0; for (Element th : thead.select("th")) { String text = th.text().trim().toLowerCase(Locale.GERMAN); if (text.contains("cover")) { colmap.put("cover", j); } else if (text.contains("titel")) { colmap.put("title", j); } else if (text.contains("verfasser")) { colmap.put("author", j); } else if (text.contains("mtyp")) { colmap.put("category", j); } else if (text.contains("jahr")) { colmap.put("year", j); } else if (text.contains("signatur")) { colmap.put("shelfmark", j); } else if (text.contains("info")) { colmap.put("info", j); } else if (text.contains("abteilung")) { colmap.put("department", j); } else if (text.contains("verliehen") || text.contains("verl.")) { colmap.put("returndate", j); } else if (text.contains("anz.res")) { colmap.put("reservations", j); } j++; } if (colmap.size() == 0) { colmap.put("cover", 0); colmap.put("title", 1); colmap.put("author", 2); colmap.put("publisher", 3); colmap.put("year", 4); colmap.put("department", 5); colmap.put("shelfmark", 6); colmap.put("returndate", 7); colmap.put("category", 8); } for (int i = 0; i < tables.size(); i++) { Element tr = tables.get(i); SearchResult sr = new SearchResult(); if (tr.select("td").get(colmap.get("cover")).select("img").size() > 0) { String imgUrl = tr.select("td").get(colmap.get("cover")).select("img").first().attr("src"); sr.setCover(imgUrl); } // Media Type if (colmap.get("category") != null) { String mType = tr.select("td").get(colmap.get("category")).text().trim().replace("\u00a0", ""); if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf( data.getJSONObject("mediatypes").getString(mType.toLowerCase(Locale.GERMAN)))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN))); } } else { sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN))); } } // Title and additional info String title; String additionalInfo = ""; if (colmap.get("info") != null) { Element info = tr.select("td").get(colmap.get("info")); title = info.select("a[title=Details-Info]").text().trim(); String authorIn = info.text().substring(0, info.text().indexOf(title)); if (authorIn.contains(":")) { authorIn = authorIn.replaceFirst("^([^:]*):(.*)$", "$1"); additionalInfo += " - " + authorIn; } } else { title = tr.select("td").get(colmap.get("title")).text().trim().replace("\u00a0", ""); if (title.contains("(") && title.indexOf("(") > 0) { additionalInfo += title.substring(title.indexOf("(")); title = title.substring(0, title.indexOf("(") - 1).trim(); } // Author if (colmap.containsKey("author")) { String author = tr.select("td").get(colmap.get("author")).text().trim().replace("\u00a0", ""); additionalInfo += " - " + author; } } // Publisher if (colmap.containsKey("publisher")) { String publisher = tr.select("td").get(colmap.get("publisher")).text().trim().replace("\u00a0", ""); additionalInfo += " (" + publisher; } // Year if (colmap.containsKey("year")) { String year = tr.select("td").get(colmap.get("year")).text().trim().replace("\u00a0", ""); additionalInfo += ", " + year + ")"; } sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo); // Status String status = tr.select("td").get(colmap.get("returndate")).text().trim().replace("\u00a0", ""); SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy", Locale.GERMAN); try { df.parse(status); // this is a return date sr.setStatus(Status.RED); sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + stringProvider.getString(StringProvider.LENT_UNTIL) + " " + status + "</i>"); } catch (ParseException e) { // this is a different status text String lc = status.toLowerCase(Locale.GERMAN); if ((lc.equals("") || lc.toLowerCase(Locale.GERMAN).contains("onleihe") || lc.contains("verleihbar") || lc.contains("entleihbar") || lc.contains("ausleihbar")) && !lc.contains("nicht")) { sr.setStatus(Status.GREEN); } else { sr.setStatus(Status.YELLOW); sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + status + "</i>"); } } // In some libraries (for example search for "atelier" in Preetz) // the results are sorted differently than their numbers suggest, so // we need to detect the number ("recno") from the link String link = tr.select("a[href^=/cgi-bin/di.exe?page=]").attr("href"); Map<String, String> params = getQueryParamsFirst(link); if (params.containsKey("recno")) { int recno = Integer.valueOf(params.get("recno")); sr.setNr(recno - 1); } else { // the above should work, but fall back to this if it doesn't sr.setNr(10 * (page - 1) + i); } // In some libraries (for example Preetz) we can detect the media ID // here using another link present in the search results Elements idLinks = tr.select("a[href^=/cgi-bin/di.exe?cMedNr]"); if (idLinks.size() > 0) { Map<String, String> idParams = getQueryParamsFirst(idLinks.first().attr("href")); String id = idParams.get("cMedNr"); sr.setId(id); } else { sr.setId(null); } results.add(sr); } return new SearchRequestResult(results, results_total, page); }
From source file:de.geeksfactory.opacclient.apis.Zones.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { Document login = login(acc);// w w w. j a va 2 s . com if (login == null) { return null; } AccountData res = new AccountData(acc.getId()); String lentLink = null; String resLink = null; int lent_cnt = -1; int res_cnt = -1; for (Element td : login.select(".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, " + ".CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) { String section = td.text().trim(); if (section.contains("Entliehene Medien")) { lentLink = td.select("a").attr("href"); lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Vormerkungen")) { resLink = td.select("a").attr("href"); res_cnt = Integer.parseInt(td.nextElementSibling().text().trim()); } else if (section.contains("Kontostand")) { res.setPendingFees(td.nextElementSibling().text().trim()); } else if (section.matches("Ausweis g.ltig bis")) { res.setValidUntil(td.nextElementSibling().text().trim()); } } for (Element a : login.select("a.AccountMenuLink")) { if (a.text().contains("Ausleihen")) { lentLink = a.attr("href"); } else if (a.text().contains("Vormerkungen")) { resLink = a.attr("href"); } } if (lentLink == null) { return null; } List<LentItem> lentItems = new ArrayList<>(); String lentUrl = opac_url + "/" + lentLink.replace("utf-8?Method", "utf-8&Method"); String lentHtml = httpGet(lentUrl, getDefaultEncoding()); Document lentDoc = Jsoup.parse(lentHtml); lentDoc.setBaseUri(lentUrl); loadMediaList(lentDoc, lentItems); res.setLent(lentItems); // In Koeln, the reservations link only doesn't show on the overview page if (resLink == null) { for (Element a : lentDoc.select("a.AccountMenuLink")) { if (a.text().contains("Vormerkungen")) { resLink = a.attr("href"); } } } List<ReservedItem> reservedItems = new ArrayList<>(); String resHtml = httpGet(opac_url + "/" + resLink, getDefaultEncoding()); Document resDoc = Jsoup.parse(resHtml); loadResList(resDoc, reservedItems); res.setReservations(reservedItems); return res; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException { Document doc = Jsoup.parse(html); if (doc.select("#RefineHitListForm").size() > 0) { // the results are located on a different page loaded via AJAX html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000) + "&hitlistindex=0&exclusionList=", ENCODING); doc = Jsoup.parse(html);// www .j a v a 2 s .c o m } if (doc.select(".nodata").size() > 0) { return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } doc.setBaseUri(opac_url + "/searchfoo"); int results_total = -1; String resultnumstr = doc.select(".box-header h2").first().text(); if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) { reusehtml = html; throw new OpacErrorException("is_a_redirect"); } else if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } Elements table = doc.select("table.data > tbody > tr"); identifier = null; Elements links = doc.select("table.data a"); boolean haslink = false; for (Element node : links) { if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils .parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select(".icn, img[width=32]").size() > 0) { String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/"); String fname = fparts[fparts.length - 1]; String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "") .replace(".png", ""); // File names can look like this: "20_DVD_Video.gif" Pattern pattern = Pattern.compile("(\\d+)_.*"); Matcher matcher = pattern.matcher(changedFname); if (matcher.find()) { changedFname = matcher.group(1); } MediaType defaulttype = defaulttypes.get(changedFname); if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttype); } } else { sr.setType(defaulttype); } } String title; String text; if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen title = tr.select(".title a").text(); text = tr.select(".title div").text(); } else { // e.g. Schaffhausen, BSB Mnchen title = tr.select(".title, .hitlistTitle").text(); text = tr.select(".results, .hitlistMetadata").first().ownText(); } // we need to do some evil javascript parsing here to get the cover // and loan status of the item // get cover if (tr.select(".cover script").size() > 0) { String js = tr.select(".cover script").first().html(); String isbn = matchJSVariable(js, "isbn"); String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(isbn) && !"".equals(ajaxUrl)) { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING); if (!"".equals(coverUrl)) { sr.setCover(coverUrl.replace("\r\n", "").trim()); } } } // get loan status and media ID if (tr.select("div[id^=loanstatus] + script").size() > 0) { String js = tr.select("div[id^=loanstatus] + script").first().html(); String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier", "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit", "context" }; String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(ajaxUrl)) { JSONObject id = new JSONObject(); List<NameValuePair> map = new ArrayList<>(); for (String variable : variables) { String value = matchJSVariable(js, variable); if (!"".equals(value)) { map.add(new BasicNameValuePair(variable, value)); } try { if (variable.equals("itemIdentifier")) { id.put("id", value); } else if (variable.equals("loanstateDBId")) { id.put("db", value); } } catch (JSONException e) { e.printStackTrace(); } } sr.setId(id.toString()); String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING) .replace("\r\n", "").trim(); Document loanStatusDoc = Jsoup.parse(loanStatusHtml); String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim(); if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich") || loanstatus.contains("Keine Exemplare verfgbar"))) { sr.setStatus(SearchResult.Status.RED); } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) { sr.setStatus(SearchResult.Status.YELLOW); } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar")) || (loanstatus.contains("heute zurckgebucht")) || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) { sr.setStatus(SearchResult.Status.GREEN); } if (sr.getType() != null) { if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked // green though they are not available. { sr.setStatus(SearchResult.Status.UNKNOWN); } } } } sr.setInnerhtml(("<b>" + title + "</b><br/>") + text); sr.setNr(10 * (page - 1) + i + 1); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:de.geeksfactory.opacclient.apis.Zones.java
@Override public List<SearchField> getSearchFields() throws IOException { if (!initialised) start();// w w w .j a va 2s.co m List<SearchField> fields = new ArrayList<>(); String html = httpGet(opac_url + "/APS_ZONES?fn=AdvancedSearch&Style=Portal3&SubStyle=&Lang=GER" + "&ResponseEncoding=utf-8", getDefaultEncoding()); Document doc = Jsoup.parse(html); // find text fields Elements txt_opts = doc.select("#formSelectTerm_1 option"); for (Element opt : txt_opts) { TextSearchField field = new TextSearchField(); field.setId(opt.attr("value")); field.setHint(""); field.setDisplayName(opt.text()); fields.add(field); } // find filters String filtersQuery = version18 ? ".inSearchLimits .floatingBox" : ".TabRechAv .limitBlock"; Elements filters = doc.select(filtersQuery); int i = 0; for (Element filter : filters) { DropdownSearchField dropdown = new DropdownSearchField(); dropdown.addDropdownValue("", "Alle"); // All dropdowns use "q.limits.limit" as URL param, but they must not have the same ID dropdown.setId("dropdown_" + i); if (version18) { dropdown.setDisplayName(filter.select("tr").get(0).text().trim()); Elements opts = filter.select("tr").get(1).select("table td:has(input)"); for (Element opt : opts) { dropdown.addDropdownValue(opt.select("input").attr("value"), opt.text().trim()); } } else { dropdown.setDisplayName(filter.parent().previousElementSibling().text().trim()); Elements opts = filter.select(".limitChoice label"); for (Element opt : opts) { dropdown.addDropdownValue(opt.attr("for"), opt.text().trim()); } } fields.add(dropdown); i++; } return fields; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected DetailledItem parse_result(String html) throws IOException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);//from w ww. ja v a 2 s.co m DetailledItem result = new DetailledItem(); if (doc.select("#cover script").size() > 0) { String js = doc.select("#cover script").first().html(); String isbn = matchJSVariable(js, "isbn"); String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (ajaxUrl == null) { ajaxUrl = matchJSParameter(js, "url"); } if (ajaxUrl != null && !"".equals(ajaxUrl)) { if (!"".equals(isbn) && isbn != null) { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=medium", ENCODING); if (!"".equals(coverUrl)) { result.setCover(coverUrl.replace("\r\n", "").trim()); } } else { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverJs = httpGet(url, ENCODING); result.setCover(matchHTMLAttr(coverJs, "src")); } } } result.setTitle(doc.select("h1").first().text()); for (Element tr : doc.select(".titleinfo tr")) { // Sometimes there is one th and one td, sometimes two tds String detailName = tr.select("th, td").first().text().trim(); String detailValue = tr.select("td").last().text().trim(); result.addDetail(new Detail(detailName, detailValue)); if (detailName.contains("ID in diesem Katalog")) { result.setId(detailValue); } } if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) { // e.g. Bayreuth_Uni String dname = ""; String dval = ""; boolean in_value = true; for (Node n : doc.select("#details").first().childNodes()) { if (n instanceof Element && ((Element) n).tagName().equals("strong")) { if (in_value) { if (dname.length() > 0 && dval.length() > 0) { result.addDetail(new Detail(dname, dval)); } dname = ((Element) n).text(); in_value = false; } else { dname += ((Element) n).text(); } } else { String t = null; if (n instanceof TextNode) { t = ((TextNode) n).text(); } else if (n instanceof Element) { t = ((Element) n).text(); } if (t != null) { if (in_value) { dval += t; } else { in_value = true; dval = t; } } } } } // Copies String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&", ""); if (!"".equals(copiesParameter)) { String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING); Document copiesDoc = Jsoup.parse(copiesHtml); List<String> table_keys = new ArrayList<>(); for (Element th : copiesDoc.select(".data tr th")) { if (th.text().contains("Zweigstelle")) { table_keys.add("branch"); } else if (th.text().contains("Status")) { table_keys.add("status"); } else if (th.text().contains("Signatur")) { table_keys.add("signature"); } else { table_keys.add(null); } } for (Element tr : copiesDoc.select(".data tr:has(td)")) { Copy copy = new Copy(); int i = 0; for (Element td : tr.select("td")) { if (table_keys.get(i) != null) { copy.set(table_keys.get(i), td.text().trim()); } i++; } result.addCopy(copy); } } // Reservation Info, only works if the code above could find a URL if (!"".equals(copiesParameter)) { String reservationParameter = copiesParameter.replace("showHoldings", "showDocument"); try { String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING); Document reservationDoc = Jsoup.parse(reservationHtml); reservationDoc.setBaseUri(opac_url); if (reservationDoc.select("a").size() == 1) { result.setReservable(true); result.setReservation_info(reservationDoc.select("a").first().attr("abs:href")); } } catch (Exception e) { e.printStackTrace(); // fail silently } } // TODO: Volumes try { Element isvolume = null; Map<String, String> volume = new HashMap<>(); Elements links = doc.select(".data td a"); int elcount = links.size(); for (int eli = 0; eli < elcount; eli++) { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8"); for (NameValuePair nv : anyurl) { if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) { isvolume = links.get(eli); } else if (nv.getName().equals("catKey")) { volume.put("catKey", nv.getValue()); } else if (nv.getName().equals("dbIdentifier")) { volume.put("dbIdentifier", nv.getValue()); } } if (isvolume != null) { volume.put("volume", "true"); result.setVolumesearch(volume); break; } } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public ProlongResult prolong(String a, Account account, int useraction, String selection) throws IOException { if (!initialised) { start();//from ww w. j av a 2 s . c om } if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT)); } catch (OpacErrorException e) { return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage()); } } else if (logged_in_as.getId() != account.getId()) { try { account(account); } catch (JSONException e) { e.printStackTrace(); return new ProlongResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT)); } catch (OpacErrorException e) { return new ProlongResult(MultiStepResult.Status.ERROR, e.getMessage()); } } if (useraction == MultiStepResult.ACTION_CONFIRMATION) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("target", "make_vl")); nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ProlongResult(MultiStepResult.Status.OK); } else { String html = httpGet(opac_url + "/" + a, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.getElementsByClass("kontomeldung").size() == 1) { return new ProlongResult(MultiStepResult.Status.ERROR, doc.getElementsByClass("kontomeldung").get(0).text()); } if (doc.select("#verlaengern").size() == 1) { if (doc.select(".kontozeile_center table").size() == 1) { Element table = doc.select(".kontozeile_center table").first(); ProlongResult res = new ProlongResult(MultiStepResult.Status.CONFIRMATION_NEEDED); List<String[]> details = new ArrayList<>(); for (Element row : table.select("tr")) { if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) { details.add(new String[] { row.select(".konto_feld").text().trim(), row.select(".konto_feldinhalt").text().trim() }); } } res.setDetails(details); return res; } else { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("target", "make_vl")); nameValuePairs.add(new BasicNameValuePair("verlaengern", "Besttigung")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ProlongResult(MultiStepResult.Status.OK); } } } return new ProlongResult(MultiStepResult.Status.ERROR, "??"); }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
@Override public ReservationResult reservation(DetailledItem item, Account account, int useraction, String selection) throws IOException { String reservation_info = item.getReservation_info(); // STEP 1: Login page String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.select("table").first().text().contains("kann nicht")) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim()); }/*from w ww .j a va 2 s . c om*/ if (doc.select("form[name=form1]").size() == 0) { return new ReservationResult(MultiStepResult.Status.ERROR); } Element form = doc.select("form[name=form1]").first(); List<BasicNameValuePair> params = new ArrayList<>(); params.add(new BasicNameValuePair("sleKndNr", account.getName())); params.add(new BasicNameValuePair("slePw", account.getPassword())); params.add(new BasicNameValuePair("pshLogin", "Reservieren")); for (Element input : form.select("input[type=hidden]")) { params.add(new BasicNameValuePair(input.attr("name"), input.attr("value"))); } // STEP 2: Confirmation page html = httpPost(opac_url + "/cgi-bin/di.exe", new UrlEncodedFormEntity(params), getDefaultEncoding()); doc = Jsoup.parse(html); if (doc.select("form[name=form1]").size() > 0) { // STEP 3: There is another confirmation needed form = doc.select("form[name=form1]").first(); html = httpGet(opac_url + "/" + generateQuery(form), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc.text().contains("fehlgeschlagen") || doc.text().contains("Achtung") || doc.text().contains("nicht m")) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.select("table").first().text().trim()); } else { return new ReservationResult(MultiStepResult.Status.OK); } }
From source file:us.colloquy.index.IndexHandler.java
public void getURIForAllDiaries(List<DocumentPointer> documentPointers, Path pathToLetters) { List<Path> results = new ArrayList<>(); int maxDepth = 6; try (Stream<Path> stream = Files.find(pathToLetters, maxDepth, (path, attr) -> { return String.valueOf(path).endsWith(".ncx"); })) {/*from w ww. j a va2 s . c o m*/ stream.forEach(results::add); } catch (IOException e) { e.printStackTrace(); } System.out.println("files: " + results.size()); try { for (Path res : results) { Path parent = res.getParent(); // System.out.println("---------------------------------------------"); // System.out.println(parent.toString()); //use jsoup to list all files that contain something useful Document doc = Jsoup.parse(res.toFile(), "UTF-8"); String title = ""; for (Element element : doc.getElementsByTag("docTitle")) { //Letter letter = new Letter(); // StringBuilder content = new StringBuilder(); for (Element child : element.children()) { title = child.text(); // System.out.println("Title: " + title); } } // System.out.println("========================== " + res.toString() + " =========================="); boolean startPrinting = false; boolean newFile = true; for (Element element : doc.getElementsByTag("navPoint")) { //get nav label and content Element navLabelElement = element.select("navLabel").first(); Element srsElement = element.select("content").first(); String navLabel = ""; String srs = ""; if (navLabelElement != null) { navLabel = navLabelElement.text().replaceAll("\\*", "").trim(); } if (srsElement != null) { srs = srsElement.attr("src"); } if ("??".matches(navLabel)) { startPrinting = false; // System.out.println("----------------- end of file pointer ---------------"); } if (StringUtils.isNotEmpty(navLabel) && navLabel.matches("??.*|?? ?.*") && newFile) { newFile = false; startPrinting = true; } if (startPrinting && !navLabel .matches("(|??? ??)")) { // System.out.println("----------------- file pointer ---------------"); // System.out.println(navLabel + "\t" + srs); DocumentPointer documentPointer = new DocumentPointer( parent.toString() + File.separator + srs.replaceAll("#.*", ""), title); documentPointers.add(documentPointer); } } // System.out.println("========================== END OF FILE =========================="); } } catch (Exception e) { e.printStackTrace(); } System.out.println("Size: " + documentPointers.size()); // for (DocumentPointer pointer : documentPointers) // { //parse and // System.out.println(pointer.getSourse() + "\t" + pointer.getUri()); }
From source file:de.geeksfactory.opacclient.apis.BiBer1992.java
@Override public List<SearchField> getSearchFields() throws IOException { List<SearchField> fields = new ArrayList<>(); HttpGet httpget;/*from www . ja va2s .co m*/ if (opacDir.contains("opax")) { httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel.html.S"); } else { httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel_main.S"); } HttpResponse response = http_client.execute(httpget); if (response.getStatusLine().getStatusCode() == 500) { throw new NotReachableException(response.getStatusLine().getReasonPhrase()); } String html = convertStreamToString(response.getEntity().getContent()); HttpUtils.consume(response.getEntity()); Document doc = Jsoup.parse(html); // get text fields Elements text_opts = doc.select("form select[name=REG1] option"); for (Element opt : text_opts) { TextSearchField field = new TextSearchField(); field.setId(opt.attr("value")); field.setDisplayName(opt.text()); field.setHint(""); fields.add(field); } // get media types Elements mt_opts = doc.select("form input[name~=(MT|MS)]"); if (mt_opts.size() > 0) { DropdownSearchField mtDropdown = new DropdownSearchField(); mtDropdown.setId(mt_opts.get(0).attr("name")); mtDropdown.setDisplayName("Medientyp"); for (Element opt : mt_opts) { if (!opt.val().equals("")) { String text = opt.text(); if (text.length() == 0) { // text is empty, check layouts: // Essen: <input name="MT"><img title="mediatype"> // Schaffenb: <input name="MT"><img alt="mediatype"> Element img = opt.nextElementSibling(); if (img != null && img.tagName().equals("img")) { text = img.attr("title"); if (text.equals("")) { text = img.attr("alt"); } } } if (text.length() == 0) { // text is still empty, check table layout, Example // Friedrichshafen // <td><input name="MT"></td> <td><img // title="mediatype"></td> Element td1 = opt.parent(); Element td2 = td1.nextElementSibling(); if (td2 != null) { Elements td2Children = td2.select("img[title]"); if (td2Children.size() > 0) { text = td2Children.get(0).attr("title"); } } } if (text.length() == 0) { // text is still empty, check images in label layout, Example // Wiedenst // <input type="radio" name="MT" id="MTYP1" value="MTYP1"> // <label for="MTYP1"><img src="http://www.wiedenest.de/bib/image/books // .png" alt="Bcher" title="Bcher"></label> Element label = opt.nextElementSibling(); if (label != null) { Elements td2Children = label.select("img[title]"); if (td2Children.size() > 0) { text = td2Children.get(0).attr("title"); } } } if (text.length() == 0) { // text is still empty: missing end tag like Offenburg text = parse_option_regex(opt); } mtDropdown.addDropdownValue(opt.val(), text); } } fields.add(mtDropdown); } // get branches Elements br_opts = doc.select("form select[name=ZW] option"); if (br_opts.size() > 0) { DropdownSearchField brDropdown = new DropdownSearchField(); brDropdown.setId(br_opts.get(0).parent().attr("name")); brDropdown.setDisplayName(br_opts.get(0).parent().parent().previousElementSibling().text() .replace("\u00a0", "").replace("?", "").trim()); for (Element opt : br_opts) { brDropdown.addDropdownValue(opt.val(), opt.text()); } fields.add(brDropdown); } return fields; }