List of usage examples for org.jsoup.nodes Element hasAttr
public boolean hasAttr(String attributeKey)
From source file:gov.medicaid.screening.dao.impl.NursingLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria.//from w w w . ja v a 2 s . c o m * @param byName flag indicating it is a name search * @return the search result for licenses * * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<License> getAllResults(NursingLicenseSearchCriteria criteria, boolean byName) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); client.setCookieStore(loginAsPublicUser()); HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); List<License> allLicenses = new ArrayList<License>(); // switch to search by name screen if (byName) { HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", "R" }, { "_ctl7:rbtnSearch", "2" }, { "_ctl7:txtCheckDigit", "" }, { "_ctl7:txtLicenseNumber", "" }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); // get the data grid entries if (page.select("table#_ctl7_grdSearchResults").size() < 1) { throw new ParsingException(ErrorCode.MITA50002.getDesc()); } Elements rows = page.select(GRID_ROW_SELECTOR); while (rows.size() > 0) { for (Element row : rows) { String url = row.select("a").first().attr("href"); String licenseNo = row.select("td:eq(4)").text(); HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url)); response = client.execute(getDetail); verifyAndAuditCall(getSearchURL(), response); Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity())); allLicenses.add(parseLicense(licenseDetails, licenseNo.substring(0, 1))); } rows.clear(); // check for next page Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first(); if (getLog() != null) { getLog().log(Level.DEBUG, "Current page is: " + currentPage.text()); } Element pageLink = currentPage.nextElementSibling(); if (pageLink != null && pageLink.hasAttr("href")) { if (getLog() != null) { getLog().log(Level.DEBUG, "There are more results, getting the next page."); } String target = parseEventTarget(pageLink.attr("href")); entity = getResultPage(criteria, client, page, search, target, getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); rows = page.select(GRID_ROW_SELECTOR); } } } else { // search by license number (site supports only exact match) HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) }, { "_ctl7:rbtnSearch", "1" }, { "_ctl7:txtCheckDigit", Util.defaultString(criteria.getCheckDigit()) }, { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); if (page.select("span#lblFormTitle").text().equals("License Details")) { String prefLicenseType = criteria.getLicenseType().getName(); allLicenses.add(parseLicense(page, prefLicenseType)); } } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:io.apiman.tools.i18n.TemplateScanner.java
/** * Scan the given html template using jsoup and find all strings that require translation. This is * done by finding all elements with a "apiman-i18n-key" attribute. * @param file/* w w w. j a v a 2 s .c o m*/ * @param strings * @throws IOException */ private static void scanFile(File file, TreeMap<String, String> strings) throws IOException { Document doc = Jsoup.parse(file, "UTF-8"); // First, scan for elements with the 'apiman-i18n-key' attribute. These require translating. Elements elements = doc.select("*[apiman-i18n-key]"); for (Element element : elements) { String i18nKey = element.attr("apiman-i18n-key"); boolean isNecessary = false; // Process the element text (if the element has no children) if (strings.containsKey(i18nKey)) { if (hasNoChildren(element)) { isNecessary = true; String elementVal = element.text(); if (elementVal.trim().length() > 0 && !elementVal.contains("{{")) { String currentValue = strings.get(i18nKey); if (!currentValue.equals(elementVal)) { throw new IOException("Duplicate i18n key found with different default values. Key=" + i18nKey + " Value1=" + elementVal + " Value2=" + currentValue); } } } } else { if (hasNoChildren(element)) { String elementVal = element.text(); if (elementVal.trim().length() > 0 && !elementVal.contains("{{")) { isNecessary = true; strings.put(i18nKey, elementVal); } } } // Process the translatable attributes for (String tattr : TRANSLATABLE_ATTRIBUTES) { if (element.hasAttr(tattr)) { String attrValue = element.attr(tattr); if (attrValue.contains("{{")) { continue; } String attrI18nKey = i18nKey + '.' + tattr; String currentAttrValue = strings.get(attrI18nKey); if (currentAttrValue == null) { isNecessary = true; strings.put(attrI18nKey, attrValue); } else if (!currentAttrValue.equals(attrValue)) { throw new IOException( "Duplicate i18n key found with different default values (for attribute '" + tattr + "'). Key=" + attrI18nKey + " Value1=" + attrValue + " Value2=" + currentAttrValue); } else { isNecessary = true; } } } if (!isNecessary) { throw new IOException("Detected an unnecessary apiman-i18n-key attribute in file '" + file.getName() + "' on element: " + element); } } // Next, scan all elements to see if the element *should* be marked for translation elements = doc.select("*"); for (Element element : elements) { if (element.hasAttr("apiman-i18n-key") || element.hasAttr("apiman-i18n-skip")) { continue; } if (hasNoChildren(element)) { String value = element.text(); if (value != null && value.trim().length() > 0) { if (!value.contains("{{")) { throw new IOException("Found an element in '" + file.getName() + "' that should be translated: " + element); } } } } // Next scan elements with a translatable attribute and fail if any of those elements // are missing the apiman-i18n-key attribute. for (String tattr : TRANSLATABLE_ATTRIBUTES) { elements = doc.select("*[" + tattr + "]"); for (Element element : elements) { if (element.hasAttr("apiman-i18n-key") || element.hasAttr("apiman-i18n-skip") || element.attr(tattr).contains("{{")) { continue; } else { throw new IOException("In template '" + file.getName() + "', found an element with a '" + tattr + "' attribute but missing 'apiman-i18n-key': " + element); } } } }
From source file:com.lumata.lib.lupa.extractor.internal.HtmlBiggestImageExtractor.java
@Override public Image extractBestImage(URL sourceUrl, Elements htmlSection, ImageExtractionRequirements requirements) { Map<String, Image> imagesToExplore = new HashMap<String, Image>(); Set<ImageDownloadTask> imagesToDownload = new HashSet<ImageDownloadTask>(); Iterator<org.jsoup.nodes.Element> it = htmlSection.iterator(); // collect valid images while (it.hasNext() && imagesToExplore.size() < requirements.getMaxImagesToExplore()) { Element imageElement = it.next(); String imageUrl = imageElement.absUrl("src"); // Do not process empty img tags, duplicated images or tracking // pixels and other assorted ads if (imageUrl == null || imagesToExplore.containsKey(imageUrl) || isTrackingPixelOrAd(imageUrl)) { continue; }// ww w. j av a 2 s. c om // remember this image Image imageContent = new Image(imageUrl); if (imageElement.hasAttr(WIDTH_ATTRIBUTE)) { // TODO: We need to convert other picture size units supported by html (there must be a lib for this) imageContent.setWidth(Integer.parseInt(imageElement.attr(WIDTH_ATTRIBUTE).replace("px", ""))); } if (imageElement.hasAttr(HEIGHT_ATTRIBUTE)) { imageContent.setHeight(Integer.parseInt(imageElement.attr(HEIGHT_ATTRIBUTE).replace("px", ""))); } if (imageContent.getWidth() == null || imageContent.getHeight() == null) {// mark image to download imagesToDownload.add(new ImageDownloadTask(imageContent)); } imagesToExplore.put(imageUrl, imageContent); } // if dimensions are empty -> download image if (CollectionUtils.isNotEmpty(imagesToDownload)) { try { ExecutorService pool = Executors.newFixedThreadPool(imagesToDownload.size(), getThreadFactory(sourceUrl)); pool.invokeAll(imagesToDownload); pool.shutdown(); } catch (InterruptedException e) { LOG.error("InterruptedException while downloading images", e); } } // select biggest image Image biggestImage = null; try { biggestImage = Collections.max(imagesToExplore.values(), new Comparator<Image>() { @Override public int compare(Image o1, Image o2) { return getSquarePixels(o1) - getSquarePixels(o2); } }); } catch (NoSuchElementException e) { return null; } // if image is too small, discard return (biggestImage.getWidth() < requirements.getMinImageSize() || biggestImage.getHeight() < requirements.getMinImageSize()) ? null : biggestImage; }
From source file:com.zacwolf.commons.email.Email.java
private void prepare(final org.jsoup.nodes.Document doc) { removeComments(doc);//Remove any comments from the html of the message to reduce the size //Change the title to match the subject of the email if (doc.getElementsByTag("title").size() > 0) doc.getElementsByTag("title").first().html(getSubject()); //Replace the contents of any tags with class="date" with the current date if (doc.getElementsByClass("date").size() > 0) { for (org.jsoup.nodes.Element datelem : doc.getElementsByClass("date")) { SimpleDateFormat df = new SimpleDateFormat("MMMMMMMMMM d, yyyy"); if (datelem.hasAttr("format")) { try { df = new SimpleDateFormat(datelem.attr("format")); } catch (Exception ee) { } //throw it away and just go back to the default format; datelem.html(df.format(TimeUtils.getGMTtime())); }/*www . j a va 2 s . c o m*/ } } //tables need the border-spacing: style attribute; added for GMail compatiblity for (org.jsoup.nodes.Element tbl : doc.getElementsByTag("table")) if (!tbl.attr("style").contains("border-spacing:")) tbl.attr("style", tbl.attr("style") + (!tbl.attr("style").endsWith(";") ? ";" : "") + "border-spacing:0;"); }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * Parse a codeblock//from w ww .j ava 2 s. c om * @param elem the element to parse * @throws a JSON exception */ private void parsePre(Element elem) throws JSONException { if (elem.hasText()) { int offset = sb.length(); String name = elem.attr("class"); if (name == null || name.length() == 0) name = "pre"; Range r = new Range(name, offset, 0); stil.add(r); if (elem.hasAttr("class")) { List<Node> children = elem.childNodes(); for (Node child : children) { if (child instanceof Element) { if (child.nodeName().equals("span")) parseSpan((Element) child); else parseOtherElement((Element) child); } else if (child instanceof TextNode) sb.append(((TextNode) child).getWholeText()); } } else sb.append(elem.text()); this.stil.updateLen(r, sb.length() - offset); } prevWasMilestone = false; ensure(1, false); }
From source file:de.geeksfactory.opacclient.apis.Pica.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException { Document doc = Jsoup.parse(html); updateSearchSetValue(doc);/*from ww w . j a v a2 s. co m*/ if (doc.select(".error").size() > 0) { String error = doc.select(".error").first().text().trim(); if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found") || error.equals("Er is niets gevonden.") || error.equals("Rien n'a t trouv.")) { // nothing found return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } else { // error throw new OpacErrorException(error); } } reusehtml = html; int results_total; String resultnumstr = doc.select(".pages").first().text(); Pattern p = Pattern.compile("[0-9]+$"); Matcher m = p.matcher(resultnumstr); if (m.find()) { resultnumstr = m.group(); } if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } else { results_total = Integer.parseInt(resultnumstr); } List<SearchResult> results = new ArrayList<>(); if (results_total == 1) { // Only one result DetailledItem singleResult = parse_result(html); SearchResult sr = new SearchResult(); sr.setType(getMediaTypeInSingleResult(html)); sr.setInnerhtml( "<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent()); results.add(sr); } Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]"); // identifier = null; Elements links = doc.select("table[summary=hitlist] a"); boolean haslink = false; for (int i = 0; i < links.size(); i++) { Element node = links.get(i); if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")), getDefaultEncoding()); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { // identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select("td.hit img").size() > 0) { String[] fparts = tr.select("td img").get(0).attr("src").split("/"); String fname = fparts[fparts.length - 1]; if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } else { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } Element middlething = tr.child(2); List<Node> children = middlething.childNodes(); int childrennum = children.size(); List<String[]> strings = new ArrayList<>(); for (int ch = 0; ch < childrennum; ch++) { Node node = children.get(ch); if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (text.length() > 3) { strings.add(new String[] { "text", "", text }); } } else if (node instanceof Element) { List<Node> subchildren = node.childNodes(); for (int j = 0; j < subchildren.size(); j++) { Node subnode = subchildren.get(j); if (subnode instanceof TextNode) { String text = ((TextNode) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") }); } } else if (subnode instanceof Element) { String text = ((Element) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") }); } } } } } StringBuilder description = new StringBuilder(); int k = 0; for (String[] part : strings) { if (part[0].equals("a") && k == 0) { description.append("<b>").append(part[2]).append("</b>"); } else if (k < 3) { description.append("<br />").append(part[2]); } k++; } sr.setInnerhtml(description.toString()); sr.setNr(10 * (page - 1) + i); sr.setId(null); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java
/** * //from ww w . j a va 2s .c o m * * @param e * @param node * @return */ public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException { if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) { if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) { Predicate p = node.getPredicate(); if (p.getOpEm() == null) { if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) { return e; } else if (p.getValue().endsWith("()") && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) { return e; } else if (p.getValue().startsWith("@") && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) { return e; } //todo p.value ~= contains(./@href,'renren.com') } else { if (p.getLeft().matches("[^/]+\\(\\)")) { Object filterRes = p.getOpEm().excute( callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(), p.getRight()); if (filterRes instanceof Boolean && (Boolean) filterRes) { return e; } else if (filterRes instanceof Integer && e.siblingIndex() == Integer.parseInt(filterRes.toString())) { return e; } } else if (p.getLeft().startsWith("@")) { String lValue = e.attr(p.getLeft().substring(1)); Object filterRes = p.getOpEm().excute(lValue, p.getRight()); if ((Boolean) filterRes) { return e; } } else { // ???xpath? List<Element> eltmp = new LinkedList<Element>(); eltmp.add(e); List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp)); if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) { return e; } } } } else { return e; } } return null; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start();//from w w w .j av a2 s. c om LoginResponse login = login(acc); if (!login.success) { return null; } AccountData adata = new AccountData(acc.getId()); if (login.warning != null) { adata.setWarning(login.warning); } // Lent media httpGet(opac_url + "/userAccount.do?methodToCall=start", ENCODING); String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=loaned", ENCODING); List<LentItem> lent = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<LentItem> nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } if (doc.select(".pagination").size() > 0 && lent != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageLent = parse_medialist(doc); if (nextpageLent != null) { lent.addAll(nextpageLent); } } } adata.setLent(lent); // Requested media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=requested", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> requested = new ArrayList<>(); List<ReservedItem> nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageRes = parse_reslist(doc); if (nextpageRes != null) { requested.addAll(nextpageRes); } } } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&accountTyp=ordered", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); List<ReservedItem> nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } if (doc.select(".pagination").size() > 0 && requested != null) { Element pagination = doc.select(".pagination").first(); Elements pages = pagination.select("a"); for (Element page : pages) { if (!page.hasAttr("href")) { continue; } html = httpGet(page.attr("abs:href"), ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); nextpageOrd = parse_reslist(doc); if (nextpageOrd != null) { requested.addAll(nextpageOrd); } } } adata.setReservations(requested); // Fees if (doc.select("#fees").size() > 0) { String text = doc.select("#fees").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); adata.setPendingFees(text); } } return adata; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException { Document doc = Jsoup.parse(html); if (doc.select("#RefineHitListForm").size() > 0) { // the results are located on a different page loaded via AJAX html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000) + "&hitlistindex=0&exclusionList=", ENCODING); doc = Jsoup.parse(html);//w ww . j a v a 2 s. c o m } if (doc.select(".nodata").size() > 0) { return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } doc.setBaseUri(opac_url + "/searchfoo"); int results_total = -1; String resultnumstr = doc.select(".box-header h2").first().text(); if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) { reusehtml = html; throw new OpacErrorException("is_a_redirect"); } else if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } Elements table = doc.select("table.data > tbody > tr"); identifier = null; Elements links = doc.select("table.data a"); boolean haslink = false; for (Element node : links) { if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils .parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select(".icn, img[width=32]").size() > 0) { String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/"); String fname = fparts[fparts.length - 1]; String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "") .replace(".png", ""); // File names can look like this: "20_DVD_Video.gif" Pattern pattern = Pattern.compile("(\\d+)_.*"); Matcher matcher = pattern.matcher(changedFname); if (matcher.find()) { changedFname = matcher.group(1); } MediaType defaulttype = defaulttypes.get(changedFname); if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttype); } } else { sr.setType(defaulttype); } } String title; String text; if (tr.select(".results table").size() > 0) { // e.g. RWTH Aachen title = tr.select(".title a").text(); text = tr.select(".title div").text(); } else { // e.g. Schaffhausen, BSB Mnchen title = tr.select(".title, .hitlistTitle").text(); text = tr.select(".results, .hitlistMetadata").first().ownText(); } // we need to do some evil javascript parsing here to get the cover // and loan status of the item // get cover if (tr.select(".cover script").size() > 0) { String js = tr.select(".cover script").first().html(); String isbn = matchJSVariable(js, "isbn"); String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(isbn) && !"".equals(ajaxUrl)) { String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String coverUrl = httpGet(url + "?isbn=" + isbn + "&size=small", ENCODING); if (!"".equals(coverUrl)) { sr.setCover(coverUrl.replace("\r\n", "").trim()); } } } // get loan status and media ID if (tr.select("div[id^=loanstatus] + script").size() > 0) { String js = tr.select("div[id^=loanstatus] + script").first().html(); String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier", "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit", "context" }; String ajaxUrl = matchJSVariable(js, "ajaxUrl"); if (!"".equals(ajaxUrl)) { JSONObject id = new JSONObject(); List<NameValuePair> map = new ArrayList<>(); for (String variable : variables) { String value = matchJSVariable(js, variable); if (!"".equals(value)) { map.add(new BasicNameValuePair(variable, value)); } try { if (variable.equals("itemIdentifier")) { id.put("id", value); } else if (variable.equals("loanstateDBId")) { id.put("db", value); } } catch (JSONException e) { e.printStackTrace(); } } sr.setId(id.toString()); String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString(); String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING) .replace("\r\n", "").trim(); Document loanStatusDoc = Jsoup.parse(loanStatusHtml); String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim(); if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung mglich") || loanstatus.contains("Keine Exemplare verfgbar"))) { sr.setStatus(SearchResult.Status.RED); } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) { sr.setStatus(SearchResult.Status.YELLOW); } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar")) || (loanstatus.contains("heute zurckgebucht")) || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) { sr.setStatus(SearchResult.Status.GREEN); } if (sr.getType() != null) { if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked // green though they are not available. { sr.setStatus(SearchResult.Status.UNKNOWN); } } } } sr.setInnerhtml(("<b>" + title + "</b><br/>") + text); sr.setNr(10 * (page - 1) + i + 1); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection) throws IOException { String reservation_info = item.getReservation_info(); Document doc = null;/*from ww w . j av a 2s .c om*/ if (useraction == MultiStepResult.ACTION_CONFIRMATION) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung")); nameValuePairs.add(new BasicNameValuePair("target", "makevorbest")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ReservationResult(MultiStepResult.Status.OK); } else if (selection == null || useraction == 0) { String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding()); doc = Jsoup.parse(html); if (doc.select("input[name=AUSWEIS]").size() > 0) { // Needs login List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName())); nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword())); if (data.has("db")) { try { nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db"))); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } } nameValuePairs.add(new BasicNameValuePair("B1", "weiter")); nameValuePairs.add(new BasicNameValuePair("target", doc.select("input[name=target]").val())); nameValuePairs.add(new BasicNameValuePair("type", "VT2")); html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc.select("select[name=" + branch_inputfield + "]").size() == 0) { if (doc.select("select[name=VZST]").size() > 0) { branch_inputfield = "VZST"; } } if (doc.select("select[name=" + branch_inputfield + "]").size() > 0) { List<Map<String, String>> branches = new ArrayList<>(); for (Element option : doc.select("select[name=" + branch_inputfield + "]").first().children()) { String value = option.text().trim(); String key; if (option.hasAttr("value")) { key = option.attr("value"); } else { key = value; } Map<String, String> selopt = new HashMap<>(); selopt.put("key", key); selopt.put("value", value); branches.add(selopt); } _res_target = doc.select("input[name=target]").attr("value"); ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED); result.setActionIdentifier(ReservationResult.ACTION_BRANCH); result.setSelection(branches); return result; } } else if (useraction == ReservationResult.ACTION_BRANCH) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection)); nameValuePairs.add(new BasicNameValuePair("button2", "weiter")); nameValuePairs.add(new BasicNameValuePair("target", _res_target)); String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc == null) { return new ReservationResult(MultiStepResult.Status.ERROR); } if (doc.select("input[name=target]").size() > 0) { if (doc.select("input[name=target]").attr("value").equals("makevorbest")) { List<String[]> details = new ArrayList<>(); if (doc.getElementsByClass("kontomeldung").size() == 1) { details.add(new String[] { doc.getElementsByClass("kontomeldung").get(0).text().trim() }); } Pattern p = Pattern.compile("geb.hr", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); for (Element div : doc.select(".kontozeile_center")) { for (String text : Jsoup.parse(div.html().replaceAll("(?i)<br[^>]*>", "br2n")).text() .split("br2n")) { if (p.matcher(text).find() && !text.contains("usstehend") && text.contains("orbestellung")) { details.add(new String[] { text.trim() }); } } } if (doc.select("#vorbest").size() > 0 && doc.select("#vorbest").val().contains("(")) { // Erlangen uses "Kostenpflichtige Vorbestellung (1 Euro)" // as the label of its reservation button details.add(new String[] { doc.select("#vorbest").val().trim() }); } for (Element row : doc.select(".kontozeile_center table tr")) { if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) { details.add(new String[] { row.select(".konto_feld").text().trim(), row.select(".konto_feldinhalt").text().trim() }); } } ReservationResult result = new ReservationResult(MultiStepResult.Status.CONFIRMATION_NEEDED); result.setDetails(details); return result; } } if (doc.getElementsByClass("kontomeldung").size() == 1) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.getElementsByClass("kontomeldung").get(0).text()); } return new ReservationResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.UNKNOWN_ERROR)); }