List of usage examples for org.jsoup.nodes Document setBaseUri
public void setBaseUri(final String baseUri)
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public AccountData account(Account account) throws IOException, JSONException, OpacErrorException { login(account);//from w ww . ja va 2 s .c o m String html; Document doc; AccountData adata = new AccountData(account.getId()); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); html = httpGet(opac_url + "/konto.cgi?sess=" + sessid, getDefaultEncoding()); doc = Jsoup.parse(html); doc.setBaseUri(opac_url + "/"); for (Element td : doc.select("table.konto td")) { if (td.text().contains("Offene")) { String text = td.text().trim().replaceAll( "Offene[^0-9]+Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.)", "$1 $2"); adata.setPendingFees(text); } } List<LentItem> lent = new ArrayList<>(); for (Element tr : doc.select("table.kontopos tr")) { LentItem item = new LentItem(); Element desc = tr.child(1).select("label").first(); String dates = tr.child(2).text().trim(); if (tr.child(1).select("a").size() > 0) { String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey"); item.setId(kk); } int i = 0; for (Node node : desc.childNodes()) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (i == 0) { item.setAuthor(text); } else if (i == 1) { item.setTitle(text); } else if (text.contains("Mediennummer")) { item.setBarcode(text.replace("Mediennummer: ", "")); } i++; } } if (tr.child(0).select("input").size() == 1) { item.setProlongData(tr.child(0).select("input").first().val()); item.setRenewable(true); } else { item.setProlongData("" + tr.child(0).select("span").first().attr("class")); item.setRenewable(false); } String todate = dates; if (todate.contains("-")) { String[] datesplit = todate.split("-"); todate = datesplit[1].trim(); } try { item.setDeadline(fmt.parseLocalDate(todate.substring(0, 10))); } catch (IllegalArgumentException e) { e.printStackTrace(); } lent.add(item); } adata.setLent(lent); List<ReservedItem> reservations = new ArrayList<>(); html = httpGet(opac_url + "/konto.cgi?konto=v&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); html = httpGet(opac_url + "/konto.cgi?konto=b&sess=" + sessid, getDefaultEncoding()); reservations.addAll(parse_reservations(html)); adata.setReservations(reservations); return adata; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
protected DetailledItem parse_result(String html) { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); DetailledItem result = new DetailledItem(); if (doc.select(".detail_cover img").size() == 1) { result.setCover(doc.select(".detail_cover img").get(0).attr("src")); }/* www .j av a 2 s . c om*/ result.setTitle(doc.select(".detail_titel").text()); Elements detailtrs = doc.select(".detailzeile table tr"); for (int i = 0; i < detailtrs.size(); i++) { Element tr = detailtrs.get(i); if (tr.child(0).hasClass("detail_feld")) { String title = tr.child(0).text(); String content = tr.child(1).text(); if (title.equals("Gesamtwerk:") || title.equals("Erschienen in:")) { try { if (tr.child(1).select("a").size() > 0) { Element link = tr.child(1).select("a").first(); List<NameValuePair> query = URLEncodedUtils.parse(new URI(link.absUrl("href")), "UTF-8"); for (NameValuePair q : query) { if (q.getName().equals("MedienNr")) { result.setCollectionId(q.getValue()); } } } } catch (URISyntaxException e) { } } else { if (content.contains("hier klicken") && tr.child(1).select("a").size() > 0) { content += " " + tr.child(1).select("a").first().attr("href"); } result.addDetail(new Detail(title, content)); } } } Elements detailcenterlinks = doc.select(".detailzeile_center a.detail_link"); for (int i = 0; i < detailcenterlinks.size(); i++) { Element a = detailcenterlinks.get(i); result.addDetail(new Detail(a.text().trim(), a.absUrl("href"))); } try { JSONObject copymap = new JSONObject(); if (data.has("copiestable")) { copymap = data.getJSONObject("copiestable"); } else { Elements ths = doc.select(".exemplartab .exemplarmenubar th"); for (int i = 0; i < ths.size(); i++) { Element th = ths.get(i); String head = th.text().trim(); if (head.equals("Zweigstelle")) { copymap.put("branch", i); } else if (head.equals("Abteilung")) { copymap.put("department", i); } else if (head.equals("Bereich") || head.equals("Standort")) { copymap.put("location", i); } else if (head.equals("Signatur")) { copymap.put("signature", i); } else if (head.equals("Barcode") || head.equals("Medien-Nummer")) { copymap.put("barcode", i); } else if (head.equals("Status")) { copymap.put("status", i); } else if (head.equals("Frist") || head.matches("Verf.+gbar")) { copymap.put("returndate", i); } else if (head.equals("Vorbestellungen") || head.equals("Reservierungen")) { copymap.put("reservations", i); } } } Elements exemplartrs = doc.select(".exemplartab .tabExemplar, .exemplartab .tabExemplar_"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (int i = 0; i < exemplartrs.size(); i++) { Element tr = exemplartrs.get(i); Copy copy = new Copy(); Iterator<?> keys = copymap.keys(); while (keys.hasNext()) { String key = (String) keys.next(); int index; try { index = copymap.has(key) ? copymap.getInt(key) : -1; } catch (JSONException e1) { index = -1; } if (index >= 0) { try { copy.set(key, tr.child(index).text(), fmt); } catch (IllegalArgumentException e) { e.printStackTrace(); } } } result.addCopy(copy); } } catch (Exception e) { e.printStackTrace(); } try { Elements bandtrs = doc.select("table .tabBand a"); for (int i = 0; i < bandtrs.size(); i++) { Element tr = bandtrs.get(i); Volume volume = new Volume(); volume.setId(tr.attr("href").split("=")[1]); volume.setTitle(tr.text()); result.addVolume(volume); } } catch (Exception e) { e.printStackTrace(); } if (doc.select(".detail_vorbest a").size() == 1) { result.setReservable(true); result.setReservation_info(doc.select(".detail_vorbest a").attr("href")); } return result; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start();/* w ww . j a v a 2s .co m*/ LoginResponse login = login(acc); if (!login.success) { return null; } AccountData adata = new AccountData(acc.getId()); if (login.warning != null) { adata.setWarning(login.warning); } // Lent media httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING); String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING); List<LentItem> lent = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<LentItem> nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } if (doc.select(".pagination").size() > 0 && lent != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } } } adata.setLent(lent); // Requested media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> requested = new ArrayList<>(); List<ReservedItem> nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } } } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } } } adata.setReservations(requested); // Fees if (doc.select("#fees").size() > 0) { String text = doc.select("#fees").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); adata.setPendingFees(text); } } return adata; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected DetailledItem parse_result(String html) throws IOException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); DetailledItem result = new DetailledItem(); if (doc.select("#cover script").size() > 0) { String js = doc.select("#cover script").first().html(); String isbn = matchJSVariable(js, "isbn"); String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (ajaxUrl == null) { ajaxUrl = matchJSParameter(js, "url"); }/*from w w w. j av a2s. c om*/ if (ajaxUrl != null && !"".equals(ajaxUrl)) { if (!"".equals(isbn) && isbn != null) { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=medium", ENCODING); if (!"".equals(coverUrl)) { result.setCover(coverUrl.replace("\r\n", "").trim()); } } else { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverJs = httpGet(url, ENCODING); result.setCover(matchHTMLAttr(coverJs, "src")); } } } result.setTitle(doc.select("h1").first().text()); for (Element tr : doc.select(".titleinfo tr")) { // Sometimes there is one th and one td, sometimes two tds String detailName = tr.select("th, td").first().text().trim(); String detailValue = tr.select("td").last().text().trim(); result.addDetail(new Detail(detailName, detailValue)); if (detailName.contains("ID in diesem Katalog")) { result.setId(detailValue); } } if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) { // e.g. Bayreuth_Uni String dname = ""; String dval = ""; boolean in_value = true; for (Node n : doc.select("#details").first().childNodes()) { if (n instanceof Element && ((Element) n).tagName().equals("strong")) { if (in_value) { if (dname.length() > 0 && dval.length() > 0) { result.addDetail(new Detail(dname, dval)); } dname = ((Element) n).text(); in_value = false; } else { dname += ((Element) n).text(); } } else { String t = null; if (n instanceof TextNode) { t = ((TextNode) n).text(); } else if (n instanceof Element) { t = ((Element) n).text(); } if (t != null) { if (in_value) { dval += t; } else { in_value = true; dval = t; } } } } } // Copies String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&", ""); if (!"".equals(copiesParameter)) { String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING); Document copiesDoc = Jsoup.parse(copiesHtml); List<String> table_keys = new ArrayList<>(); for (Element th : copiesDoc.select(".data tr th")) { if (th.text().contains("Zweigstelle")) { table_keys.add("branch"); } else if (th.text().contains("Status")) { table_keys.add("status"); } else if (th.text().contains("Signatur")) { table_keys.add("signature"); } else { table_keys.add(null); } } for (Element tr : copiesDoc.select(".data tr:has(td)")) { Copy copy = new Copy(); int i = 0; for (Element td : tr.select("td")) { if (table_keys.get(i) != null) { copy.set(table_keys.get(i), td.text().trim()); } i++; } result.addCopy(copy); } } // Reservation Info, only works if the code above could find a URL if (!"".equals(copiesParameter)) { String reservationParameter = copiesParameter.replace("showHoldings", "showDocument"); try { String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING); Document reservationDoc = Jsoup.parse(reservationHtml); reservationDoc.setBaseUri(opac_url); if (reservationDoc.select("a").size() == 1) { result.setReservable(true); result.setReservation_info(reservationDoc.select("a").first().attr("abs:href")); } } catch (Exception e) { e.printStackTrace(); // fail silently } } // TODO: Volumes try { Element isvolume = null; Map<String, String> volume = new HashMap<>(); Elements links = doc.select(".data td a"); int elcount = links.size(); for (int eli = 0; eli < elcount; eli++) { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8"); for (NameValuePair nv : anyurl) { if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) { isvolume = links.get(eli); } else if (nv.getName().equals("catKey")) { volume.put("catKey", nv.getValue()); } else if (nv.getName().equals("dbIdentifier")) { volume.put("dbIdentifier", nv.getValue()); } } if (isvolume != null) { volume.put("volume", "true"); result.setVolumesearch(volume); break; } } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected void parse_reslist(String type, List<ReservedItem> reservations, Document doc, int offset) { Elements copytrs = doc.select(".data tr"); doc.setBaseUri(opac_url); int trs = copytrs.size(); if (trs == 1) { return;/* w ww. j a va 2 s .c o m*/ } assert (trs > 0); for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); ReservedItem item = new ReservedItem(); if (tr.text().contains("keine Daten") || tr.children().size() == 1) { return; } item.setTitle(tr.child(1).select("strong").text().trim()); try { String[] rowsplit1 = tr.child(1).html().split("<br[ /]*>"); String[] rowsplit2 = tr.child(2).html().split("<br[ /]*>"); if (rowsplit1.length > 1) item.setAuthor(rowsplit1[1].trim()); if (rowsplit2.length > 2) item.setBranch(rowsplit2[2].trim()); if (rowsplit2.length > 2) item.setStatus(rowsplit2[0].trim()); if (tr.select("a").size() == 1) { item.setCancelData(type + "$" + offset + "$" + tr.select("a").attr("abs:href").split("\\?")[1]); } } catch (Exception e) { e.printStackTrace(); } reservations.add(item); } assert (reservations.size() == trs - 1); }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected void parse_medialist(List<LentItem> media, Document doc, int offset) { Elements copytrs = doc.select(".data tr"); doc.setBaseUri(opac_url); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); int trs = copytrs.size(); if (trs == 1) { return;/*w w w . j av a2 s. co m*/ } assert (trs > 0); for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); LentItem item = new LentItem(); if (tr.text().contains("keine Daten")) { return; } item.setTitle(tr.child(1).select("strong").text().trim()); try { item.setAuthor(tr.child(1).html().split("<br[ /]*>")[1].trim()); String[] col2split = tr.child(2).html().split("<br[ /]*>"); String deadline = col2split[0].trim(); if (deadline.contains("-")) { deadline = deadline.split("-")[1].trim(); } try { item.setDeadline(fmt.parseLocalDate(deadline).toString()); } catch (IllegalArgumentException e1) { e1.printStackTrace(); } if (col2split.length > 1) { item.setHomeBranch(col2split[1].trim()); } if (tr.select("a").size() > 0) { for (Element link : tr.select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq.get("methodToCall").equals("renewalPossible")) { item.setProlongData(offset + "$" + href.split("\\?")[1]); item.setRenewable(true); break; } } } else if (tr.select(".textrot, .textgruen, .textdunkelblau").size() > 0) { item.setProlongData("" + tr.select(".textrot, .textgruen, .textdunkelblau").text()); item.setRenewable(false); } } catch (Exception ex) { ex.printStackTrace(); } media.add(item); } assert (media.size() == trs - 1); }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start(); // TODO: Is this necessary? int resultNum; if (!login(acc)) { return null; }//from www . j a v a2 s . c om // Geliehene Medien String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=1", ENCODING); List<LentItem> medien = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_medialist(medien, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { continue; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_medialist(medien, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (doc.select("#label1").size() > 0) { resultNum = 0; String rNum = doc.select("#label1").first().text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } assert (resultNum == medien.size()); } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=6", ENCODING); List<ReservedItem> reserved = new ArrayList<>(); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("6", reserved, doc, 1); Elements label6 = doc.select("#label6"); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("6", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } // Prebooked media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=7", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("7", reserved, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("7", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (label6.size() > 0 && doc.select("#label7").size() > 0) { resultNum = 0; String rNum = label6.text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } rNum = doc.select("#label7").text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum += Integer.parseInt(rNum); } assert (resultNum == reserved.size()); } AccountData res = new AccountData(acc.getId()); if (doc.select("#label8").size() > 0) { String text = doc.select("#label8").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); res.setPendingFees(text); } } Pattern p = Pattern.compile("[^0-9.]*", Pattern.MULTILINE); if (doc.select(".box3").size() > 0) { for (Element box : doc.select(".box3")) { if (box.select("strong").size() == 1) { String text = box.select("strong").text(); if (text.equals("Jahresgebhren")) { text = box.text(); text = p.matcher(text).replaceAll(""); res.setValidUntil(text); } } } } res.setLent(medien); res.setReservations(reserved); return res; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException { Document doc = Jsoup.parse(html); if (doc.select("#RefineHitListForm").size() > 0) { // the results are located on a different page loaded via AJAX html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000) + "&hitlistindex=0&exclusionList=", ENCODING); doc = Jsoup.parse(html);/* w ww . jav a 2 s. c om*/ } if (doc.select(".nodata").size() > 0) { return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } doc.setBaseUri(opac_url + "/searchfoo"); int results_total = -1; String resultnumstr = doc.select(".box-header h2").first().text(); if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) { reusehtml = html; throw new OpacErrorException("is_a_redirect"); } else if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } Elements table = doc.select("table.data > tbody > tr"); identifier = null; Elements links = doc.select("table.data a"); boolean haslink = false; for (Element node : links) { if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils .parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select(".icn, img[width=32]").size() > 0) { String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/"); String fname = fparts[fparts.length - 1]; String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "") .replace(".png", ""); // File names can look like this: "20_DVD_Video.gif" Pattern pattern = Pattern.compile("(\\d+)_.*"); Matcher matcher = pattern.matcher(changedFname); if (matcher.find()) { changedFname = matcher.group(1); } MediaType defaulttype = defaulttypes.get(changedFname); if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttype); } } else { sr.setType(defaulttype); } } String title; String text; if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen title = tr.select(".title a").text(); text = tr.select(".title div").text(); } else { // e.g. Schaffhausen, BSB Mnchen title = tr.select(".title, .hitlistTitle").text(); text = tr.select(".results, .hitlistMetadata").first().ownText(); } // we need to do some evil javascript parsing here to get the cover // and loan status of the item // get cover if (tr.select(".cover script").size() > 0) { String js = tr.select(".cover script").first().html(); String isbn = matchJSVariable(js, "isbn"); String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(isbn) && !"".equals(ajaxUrl)) { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING); if (!"".equals(coverUrl)) { sr.setCover(coverUrl.replace("\r\n", "").trim()); } } } // get loan status and media ID if (tr.select("div[id^=loanstatus] + script").size() > 0) { String js = tr.select("div[id^=loanstatus] + script").first().html(); String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier", "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit", "context" }; String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(ajaxUrl)) { JSONObject id = new JSONObject(); List<NameValuePair> map = new ArrayList<>(); for (String variable : variables) { String value = matchJSVariable(js, variable); if (!"".equals(value)) { map.add(new BasicNameValuePair(variable, value)); } try { if (variable.equals("itemIdentifier")) { id.put("id", value); } else if (variable.equals("loanstateDBId")) { id.put("db", value); } } catch (JSONException e) { e.printStackTrace(); } } sr.setId(id.toString()); String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING) .replace("\r\n", "").trim(); Document loanStatusDoc = Jsoup.parse(loanStatusHtml); String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim(); if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich") || loanstatus.contains("Keine Exemplare verfgbar"))) { sr.setStatus(SearchResult.Status.RED); } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) { sr.setStatus(SearchResult.Status.YELLOW); } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar")) || (loanstatus.contains("heute zurckgebucht")) || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) { sr.setStatus(SearchResult.Status.GREEN); } if (sr.getType() != null) { if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked // green though they are not available. { sr.setStatus(SearchResult.Status.UNKNOWN); } } } } sr.setInnerhtml(("<b>" + title + "</b><br/>") + text); sr.setNr(10 * (page - 1) + i + 1); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url + "/searchfoo"); if (doc.select(".error").size() > 0) { throw new OpacErrorException(doc.select(".error").text().trim()); } else if (doc.select(".nohits").size() > 0) { throw new OpacErrorException(doc.select(".nohits").text().trim()); } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) { return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); }// w ww. j a v a 2 s . co m int results_total = -1; String resultnumstr = doc.select(".box-header h2").first().text(); if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) { reusehtml = html; throw new OpacErrorException("is_a_redirect"); } else if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } Elements table = doc.select("table.data tbody tr"); identifier = null; Elements links = doc.select("table.data a"); boolean haslink = false; for (int i = 0; i < links.size(); i++) { Element node = links.get(i); if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils .parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select("td img[title]").size() > 0) { String title = tr.select("td img").get(0).attr("title"); String[] fparts = tr.select("td img").get(0).attr("src").split("/"); String fname = fparts[fparts.length - 1]; MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", "")); MediaType default_by_title = defaulttypes.get(title); MediaType default_name = default_by_title != null ? default_by_title : default_by_fname; if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(default_name); } } else { sr.setType(default_name); } } String alltext = tr.text(); if (alltext.contains("eAudio") || alltext.contains("eMusic")) { sr.setType(MediaType.MP3); } else if (alltext.contains("eVideo")) { sr.setType(MediaType.EVIDEO); } else if (alltext.contains("eBook")) { sr.setType(MediaType.EBOOK); } else if (alltext.contains("Munzinger")) { sr.setType(MediaType.EDOC); } if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) { sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src")); if (sr.getCover().contains("showCover.do")) { downloadCover(sr); } } Element middlething; if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) { middlething = tr.child(2); } else { middlething = tr.child(1); } List<Node> children = middlething.childNodes(); if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) { Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first(); if (indiv.children().size() > 1) { children = indiv.childNodes(); } } else if (middlething.select("span.titleData").size() == 1) { children = middlething.select("span.titleData").first().childNodes(); } int childrennum = children.size(); List<String[]> strings = new ArrayList<>(); for (int ch = 0; ch < childrennum; ch++) { Node node = children.get(ch); if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (text.length() > 3) { strings.add(new String[] { "text", "", text }); } } else if (node instanceof Element) { List<Node> subchildren = node.childNodes(); for (int j = 0; j < subchildren.size(); j++) { Node subnode = subchildren.get(j); if (subnode instanceof TextNode) { String text = ((TextNode) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") }); } } else if (subnode instanceof Element) { String text = ((Element) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") }); } } } } } StringBuilder description = null; if (tr.select("span.Z3988").size() == 1) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. List<NameValuePair> z3988data; boolean hastitle = false; try { description = new StringBuilder(); z3988data = URLEncodedUtils .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8"); for (NameValuePair nv : z3988data) { if (nv.getValue() != null) { if (!nv.getValue().trim().equals("")) { if (nv.getName().equals("rft.btitle") && !hastitle) { description.append("<b>").append(nv.getValue()).append("</b>"); hastitle = true; } else if (nv.getName().equals("rft.atitle") && !hastitle) { description.append("<b>").append(nv.getValue()).append("</b>"); hastitle = true; } else if (nv.getName().equals("rft.au")) { description.append("<br />").append(nv.getValue()); } else if (nv.getName().equals("rft.date")) { description.append("<br />").append(nv.getValue()); } } } } } catch (URISyntaxException e) { description = null; } } boolean described = false; if (description != null && description.length() > 0) { sr.setInnerhtml(description.toString()); described = true; } else { description = new StringBuilder(); } int k = 0; boolean yearfound = false; boolean titlefound = false; boolean sigfound = false; for (String[] part : strings) { if (!described) { if (part[0].equals("a") && (k == 0 || !titlefound)) { if (k != 0) { description.append("<br />"); } description.append("<b>").append(part[2]).append("</b>"); titlefound = true; } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) { yearfound = true; if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) { if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) { if (k != 0) { description.append("<br />"); } description.append(part[2]); } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) { description.append("<br />"); description.append(part[2]); } } if (part.length == 4) { if (part[0].equals("span") && part[3].equals("textgruen")) { sr.setStatus(SearchResult.Status.GREEN); } else if (part[0].equals("span") && part[3].equals("textrot")) { sr.setStatus(SearchResult.Status.RED); } } else if (part.length == 5) { if (part[4].contains("purple")) { sr.setStatus(SearchResult.Status.YELLOW); } } if (sr.getStatus() == null) { if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht mglich")) || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) { sr.setStatus(SearchResult.Status.RED); } else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) { sr.setStatus(SearchResult.Status.YELLOW); } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurckgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) { sr.setStatus(SearchResult.Status.GREEN); } if (sr.getType() != null) { if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked // green though they are not available. { sr.setStatus(SearchResult.Status.UNKNOWN); } } } k++; } if (!described) { sr.setInnerhtml(description.toString()); } sr.setNr(10 * (page - 1) + i); sr.setId(null); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
protected DetailledItem parse_result(String html) throws IOException { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING); Document doc2 = Jsoup.parse(html2); doc2.setBaseUri(opac_url);/*from ww w . j a v a2s. c om*/ String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive", ENCODING); Document doc3 = Jsoup.parse(html3); doc3.setBaseUri(opac_url); DetailledItem result = new DetailledItem(); try { result.setId(doc.select("#bibtip_id").text().trim()); } catch (Exception ex) { ex.printStackTrace(); } List<String> reservationlinks = new ArrayList<>(); for (Element link : doc3.select("#vormerkung a, #tab-content a")) { String href = link.absUrl("href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (result.getId() == null) { // ID retrieval String key = hrefq.get("katkey"); if (key != null) { result.setId(key); break; } } // Vormerken if (hrefq.get("methodToCall") != null) { if (hrefq.get("methodToCall").equals("doVormerkung") || hrefq.get("methodToCall").equals("doBestellung")) { reservationlinks.add(href.split("\\?")[1]); } } } if (reservationlinks.size() == 1) { result.setReservable(true); result.setReservation_info(reservationlinks.get(0)); } else if (reservationlinks.size() == 0) { result.setReservable(false); } else { // TODO: Multiple options - handle this case! } if (doc.select(".data td img").size() == 1) { result.setCover(doc.select(".data td img").first().attr("abs:src")); try { downloadCover(result); } catch (Exception e) { } } if (doc.select(".aw_teaser_title").size() == 1) { result.setTitle(doc.select(".aw_teaser_title").first().text().trim()); } else if (doc.select(".data td strong").size() > 0) { result.setTitle(doc.select(".data td strong").first().text().trim()); } else { result.setTitle(""); } if (doc.select(".aw_teaser_title_zusatz").size() > 0) { result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim())); } String title = ""; String text = ""; boolean takeover = false; Element detailtrs = doc2.select(".box-container .data td").first(); for (Node node : detailtrs.childNodes()) { if (node instanceof Element) { if (((Element) node).tagName().equals("strong")) { title = ((Element) node).text().trim(); text = ""; } else { if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) { text = text + node.attr("href"); takeover = true; break; } } } else if (node instanceof TextNode) { text = text + ((TextNode) node).text(); } } if (!takeover) { text = ""; title = ""; } detailtrs = doc2.select("#tab-content .data td").first(); if (detailtrs != null) { for (Node node : detailtrs.childNodes()) { if (node instanceof Element) { if (((Element) node).tagName().equals("strong")) { if (!text.equals("") && !title.equals("")) { result.addDetail(new Detail(title.trim(), text.trim())); if (title.equals("Titel:")) { result.setTitle(text.trim()); } text = ""; } title = ((Element) node).text().trim(); } else { if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) { text = text + node.attr("href"); } else { text = text + ((Element) node).text(); } } } else if (node instanceof TextNode) { text = text + ((TextNode) node).text(); } } } else { if (doc2.select("#tab-content .fulltitle tr").size() > 0) { Elements rows = doc2.select("#tab-content .fulltitle tr"); for (Element tr : rows) { if (tr.children().size() == 2) { Element valcell = tr.child(1); String value = valcell.text().trim(); if (valcell.select("a").size() == 1) { value = valcell.select("a").first().absUrl("href"); } result.addDetail(new Detail(tr.child(0).text().trim(), value)); } } } else { result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR), stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL))); } } if (!text.equals("") && !title.equals("")) { result.addDetail(new Detail(title.trim(), text.trim())); if (title.equals("Titel:")) { result.setTitle(text.trim()); } } for (Element link : doc3.select("#tab-content a")) { Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href")); if (result.getId() == null) { // ID retrieval String key = hrefq.get("katkey"); if (key != null) { result.setId(key); break; } } } for (Element link : doc3.select(".box-container a")) { if (link.text().trim().equals("Download")) { result.addDetail( new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href"))); } } Map<String, Integer> copy_columnmap = new HashMap<>(); // Default values copy_columnmap.put("barcode", 1); copy_columnmap.put("branch", 3); copy_columnmap.put("status", 4); Elements copy_columns = doc.select("#tab-content .data tr#bg2 th"); for (int i = 0; i < copy_columns.size(); i++) { Element th = copy_columns.get(i); String head = th.text().trim(); if (head.contains("Status")) { copy_columnmap.put("status", i); } if (head.contains("Zweigstelle")) { copy_columnmap.put("branch", i); } if (head.contains("Mediennummer")) { copy_columnmap.put("barcode", i); } if (head.contains("Standort")) { copy_columnmap.put("location", i); } if (head.contains("Signatur")) { copy_columnmap.put("signature", i); } } Pattern status_lent = Pattern.compile( "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$"); Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$"); Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : exemplartrs) { try { Copy copy = new Copy(); Element status = tr.child(copy_columnmap.get("status")); Element barcode = tr.child(copy_columnmap.get("barcode")); String barcodetext = barcode.text().trim().replace(" Wegweiser", ""); // STATUS String statustext; if (status.getElementsByTag("b").size() > 0) { statustext = status.getElementsByTag("b").text().trim(); } else { statustext = status.text().trim(); } if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) { Matcher matcher1 = status_and_barcode.matcher(statustext); if (matcher1.matches()) { statustext = matcher1.group(1); barcodetext = matcher1.group(2); } } Matcher matcher = status_lent.matcher(statustext); if (matcher.matches()) { copy.setStatus(matcher.group(1)); copy.setReservations(matcher.group(3)); copy.setReturnDate(fmt.parseLocalDate(matcher.group(2))); } else { copy.setStatus(statustext); } copy.setBarcode(barcodetext); if (status.select("a[href*=doVormerkung]").size() == 1) { copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]); } String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", ""); copy.setBranch(branchtext); if (copy_columnmap.containsKey("location")) { copy.setLocation( tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", "")); } if (copy_columnmap.containsKey("signature")) { copy.setShelfmark( tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", "")); } result.addCopy(copy); } catch (Exception ex) { ex.printStackTrace(); } } try { Element isvolume = null; Map<String, String> volume = new HashMap<>(); Elements links = doc.select(".data td a"); int elcount = links.size(); for (int eli = 0; eli < elcount; eli++) { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8"); for (NameValuePair nv : anyurl) { if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) { isvolume = links.get(eli); } else if (nv.getName().equals("catKey")) { volume.put("catKey", nv.getValue()); } else if (nv.getName().equals("dbIdentifier")) { volume.put("dbIdentifier", nv.getValue()); } } if (isvolume != null) { volume.put("volume", "true"); result.setVolumesearch(volume); break; } } } catch (Exception e) { e.printStackTrace(); } return result; }