List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:org.shareok.data.sagedata.SageJournalIssueDateProcessor.java
public Map<String, Map<String, String>> updateSageJournalLinks(Map<String, Map<String, String>> journalMap) { Document doc = null;/*from w w w . j a v a 2s.c o m*/ try { doc = Jsoup.connect("http://journals.sagepub.com/action/showPublications?pageSize=20&startPage=199") .userAgent( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36") .cookie("auth", "token").timeout(300000).get(); Elements trs = doc.select("form#browsePublicationsForm").get(0).select("table").get(0).select("tbody") .get(0).select("tr"); for (Element tr : trs) { Element link = tr.select("td").get(1).select("a").get(0); String journalName = link.text(); String journalLink = SageDataUtil.SAGE_HTTP_PREFIX + link.attr("href"); String[] linkInfo = journalLink.split("/"); String journalIssuesLink = SageDataUtil.SAGE_HTTP_PREFIX + "/loi/" + linkInfo[linkInfo.length - 1]; if (null == journalMap.get(journalName)) { Map<String, String> infoMap = new HashMap<>(); infoMap.put("homeLink", journalLink); infoMap.put("issueLink", journalIssuesLink); journalMap.put(journalName, infoMap); } else { Map<String, String> infoMap = journalMap.get(journalName); if (null == infoMap.get("homeLink")) { infoMap.put("homeLink", journalLink); } if (null == infoMap.get("issueLink")) { infoMap.put("issueLink", journalIssuesLink); } } } } catch (Exception ex) { ex.printStackTrace(); } return journalMap; }
From source file:com.crawler.app.run.CrawlSite.java
@Override public void visit(Page page) { String url = page.getWebURL().getURL(); // logger.info("URL: ", url); if (ReadXmlConfig() && readXmlConfigDatabase()) { status_read_xml = true;//from www.j a v a2 s. c o m } else { return; } System.out.println("\n URL visit: " + url); if (page.getParseData() instanceof HtmlParseData) { HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); String text = htmlParseData.getText(); String html = htmlParseData.getHtml(); String title = htmlParseData.getTitle(); Document doc = Jsoup.parse(html, "UTF-8"); Element body = doc.body(); Elements listDetail = body.select(bodySelect); Integer i = 0; Integer siteID = siteIDXML; Integer provinceID = 1; MysqlCrawler.createConn(host, port, dbName, dbUser, dbPwd); String jobImage, jobUrl, aJobName, cJobLocation = null, cLocationNear = "", bJobCompany = "", dJobCareer, eJobSalary, gJobDescription, gJobDetailShort, gJobDetail, jobDetailImage, jobDetailImageName, hJobExpire = null; for (Element detail : listDetail) { i++; try { jobImage = ""; /* job img */ if (!jobImgQuery.isEmpty()) { if (jobImagePosition > -1) { if (jobImagePosition < detail.select(jobImgQuery).size()) { if (!detail.select(jobImgQuery).get(jobImagePosition).attr(jobImageFormatAttr) .isEmpty()) { if (!jobImgUrl.isEmpty()) { if (JobImageSelectPosition.isEmpty()) { jobImage = jobImgUrl + detail.select(jobImgQuery).get(jobImagePosition) .attr(jobImageFormatAttr); } else { jobImage = jobImgUrl + detail.select(jobImgQuery).get(jobImagePosition) .select(JobImageSelectPosition).attr(jobImageFormatAttr); } } else { if (JobImageSelectPosition.isEmpty()) { jobImage = detail.select(jobImgQuery).get(jobImagePosition) .attr(jobImageFormatAttr); } else { jobImage = detail.select(jobImgQuery).get(jobImagePosition) .select(JobImageSelectPosition).attr(jobImageFormatAttr); } } } } } else { if (!detail.select(jobImgQuery).attr(jobImageFormatAttr).isEmpty()) { if (!jobImgUrl.isEmpty()) { jobImage = jobImgUrl + detail.select(jobImgQuery).first().attr(jobImageFormatAttr); } else { jobImage = detail.select(jobImgQuery).first().attr(jobImageFormatAttr); } } } } /* job url */ jobUrl = ""; if (!jobUrlQuery.isEmpty()) { if (jobUrlPosition > -1) { if (jobUrlPosition < detail.select(jobUrlQuery).size()) { if (!joburl_url.isEmpty()) { if (JobUrlSelectPosition.isEmpty()) { jobUrl = joburl_url + detail.select(jobUrlQuery).get(jobUrlPosition) .attr(jobUrlFormatAttr); } else { jobUrl = joburl_url + detail.select(jobUrlQuery).get(jobUrlPosition) .select(JobUrlSelectPosition).attr(jobUrlFormatAttr); } } else { if (JobUrlSelectPosition.isEmpty()) { jobUrl = detail.select(jobUrlQuery).get(jobUrlPosition) .attr(jobUrlFormatAttr); } else { jobUrl = detail.select(jobUrlQuery).get(jobUrlPosition) .select(JobUrlSelectPosition).attr(jobUrlFormatAttr); } } } } else { if (!joburl_url.isEmpty()) { jobUrl = joburl_url + detail.select(jobUrlQuery).first().attr(jobUrlFormatAttr); } else { jobUrl = detail.select(jobUrlQuery).first().attr(jobUrlFormatAttr); } } } // change org.jsoup.nodes.Element detailJobUrl = convertUrlToDocument(jobUrl); //System.out.print(detailJobUrl); //System.exit(1); /* job location */ if (!jobLocationQuery.isEmpty()) { if (jobLocationFormatData.toUpperCase().equals("TEXT")) { cJobLocation = detailJobUrl.select(jobLocationQuery).text(); } else if (jobLocationFormatData.toUpperCase().equals("HTML")) { cJobLocation = detailJobUrl.select(jobLocationQuery).html(); } } /* job name */ aJobName = ""; if (jobNameFormatData.toUpperCase().equals("TEXT")) { aJobName = detailJobUrl.select(jobNameQuery).text(); } else if (jobNameFormatData.toUpperCase().equals("HTML")) { aJobName = detailJobUrl.select(jobNameQuery).html(); } /* job description */ gJobDescription = ""; if (!JobDescriptionQuery.isEmpty()) { if (jobDescriptionFormatData.toUpperCase().equals("TEXT")) { gJobDescription = detailJobUrl.select(JobDescriptionQuery).text(); } else if (jobDescriptionFormatData.toUpperCase().equals("HTML")) { gJobDescription = detailJobUrl.select(JobDescriptionQuery).html(); } } /* job detail short */ gJobDetailShort = ""; if (!JobDetailShortQuery.isEmpty()) { if (jobDetailShortFormatData.toUpperCase().equals("TEXT")) { gJobDetailShort = detailJobUrl.select(JobDetailShortQuery).text(); } else if (jobDetailShortFormatData.toUpperCase().equals("HTML")) { gJobDetailShort = detailJobUrl.select(JobDetailShortQuery).html(); } } /* job detail */ gJobDetail = ""; if (!JobDetailQuery.isEmpty()) { if (jobDetailFormatData.toUpperCase().equals("TEXT")) { gJobDetail = detailJobUrl.select(JobDetailQuery).text(); } else if (jobDetailFormatData.toUpperCase().equals("HTML")) { gJobDetail = detailJobUrl.select(JobDetailQuery).html(); } } /* job detail img*/ jobDetailImage = ""; jobDetailImageName = ""; if (!jobDetailImgQuery.isEmpty()) { if (jobDetailImagePosition > -1) { if (jobDetailImagePosition < detailJobUrl.select(jobDetailImgQuery).size()) { if (!detailJobUrl.select(jobDetailImgQuery).get(jobDetailImagePosition) .attr(jobDetailImageFormatAttr).isEmpty()) { if (!jobDetailImgUrl.isEmpty()) { if (JobDetailImageSelectPosition.isEmpty()) { jobDetailImage = jobDetailImgUrl + detailJobUrl .select(jobDetailImgQuery).get(jobDetailImagePosition) .attr(jobDetailImageFormatAttr); } else { jobDetailImage = jobDetailImgUrl + detailJobUrl .select(jobDetailImgQuery).get(jobDetailImagePosition) .select(JobDetailImageSelectPosition) .attr(jobDetailImageFormatAttr); } } else { if (JobDetailImageSelectPosition.isEmpty()) { jobDetailImage = detailJobUrl.select(jobDetailImgQuery) .get(jobDetailImagePosition).attr(jobDetailImageFormatAttr); } else { jobDetailImage = detailJobUrl.select(jobDetailImgQuery) .get(jobDetailImagePosition) .select(JobDetailImageSelectPosition) .attr(jobDetailImageFormatAttr); } } } } } else { if (!detailJobUrl.select(jobDetailImgQuery).attr(jobDetailImageFormatAttr).isEmpty()) { if (!jobDetailImgUrl.isEmpty()) { jobDetailImage = jobDetailImgUrl + detailJobUrl.select(jobDetailImgQuery) .first().attr(jobDetailImageFormatAttr); } else { jobDetailImage = detailJobUrl.select(jobDetailImgQuery).first() .attr(jobDetailImageFormatAttr); } } } if (!jobDetailImage.isEmpty()) { jobDetailImageName = DownloadImage.downloadImage(jobDetailImage, "D:\\/Java\\/storage"); } } /* job location near */ cLocationNear = ""; if (!locationNearQuery.isEmpty()) { if (locationNearFormatData.toUpperCase().equals("TEXT")) { cLocationNear = detailJobUrl.select(locationNearQuery).text(); } else if (locationNearFormatData.toUpperCase().equals("HTML")) { cLocationNear = detailJobUrl.select(locationNearQuery).html(); } } /* job salary */ eJobSalary = ""; if (!JobSalaryQuery.isEmpty()) { if (jobSalaryFormatData.toUpperCase().equals("TEXT")) { eJobSalary = detailJobUrl.select(JobSalaryQuery).text(); } else if (jobSalaryFormatData.toUpperCase().equals("HTML")) { eJobSalary = detailJobUrl.select(JobSalaryQuery).html(); } } /* job expire */ hJobExpire = ""; if (!JobExpireQuery.isEmpty()) { if (jobExpireFormatData.toUpperCase().equals("TEXT")) { hJobExpire = detailJobUrl.select(JobExpireQuery).text(); } else if (jobExpireFormatData.toUpperCase().equals("HTML")) { hJobExpire = detailJobUrl.select(JobExpireQuery).html(); } } /* job company */ bJobCompany = ""; if (!JobCompanyQuery.isEmpty()) { if (jobCompanyFormatData.toUpperCase().equals("TEXT")) { bJobCompany = detailJobUrl.select(JobCompanyQuery).text(); } else if (jobCompanyFormatData.toUpperCase().equals("HTML")) { bJobCompany = detailJobUrl.select(JobCompanyQuery).html(); } } /* job type */ String fJobType = ""; if (!JobTypeQuery.isEmpty()) { if (jobTypeFormatData.toUpperCase().equals("TEXT")) { fJobType = detailJobUrl.select(JobTypeQuery).text(); } else if (jobTypeFormatData.toUpperCase().equals("HTML")) { fJobType = detailJobUrl.select(JobTypeQuery).html(); } } /* job address */ String jobAddress = ""; if (!JobAddressQuery.isEmpty()) { if (jobAddressFormatData.toUpperCase().equals("TEXT")) { jobAddress = detailJobUrl.select(JobAddressQuery).text(); } else if (jobAddressFormatData.toUpperCase().equals("HTML")) { jobAddress = detailJobUrl.select(JobAddressQuery).html(); } } dJobCareer = ""; if (!JobCareerQuery.isEmpty()) { if (jobCareerFormatData.toUpperCase().equals("TEXT")) { dJobCareer = detailJobUrl.select(JobCareerQuery).text(); } else if (jobCareerFormatData.toUpperCase().equals("HTML")) { dJobCareer = detailJobUrl.select(JobCareerQuery).html(); } } System.out.println("\n Url : " + jobUrl); System.out.println("\n Image : " + jobImage); System.out.println("\n Title : " + aJobName); System.out.println("\n Title SEO : " + StringUtils.removeAccent(aJobName)); //System.out.println("\n Location : " + cJobLocation + "\n" // + cLocationNear); System.out.println("\n jobDetailImageName : " + jobDetailImageName); // System.out.println("\n Detail : " + gJobDetail); // System.out.println("\n Salary : " + eJobSalary); // System.out.println("\n expire Date : " + hJobExpire); // System.out.println("\n Company : " + bJobCompany); // System.out.println("\n JobType : " + fJobType); // System.out.println("\n Full I : " + i); String news_title = aJobName; String news_title_seo = StringUtils.removeAccent(aJobName); String news_meta = aJobName; String news_description = gJobDescription; String news_tag = aJobName.replace(" ", ", "); String news_pic = jobDetailImageName; String pic_note = aJobName; String news_subcontent = "<p>" + gJobDescription + "</p>"; String news_content = gJobDetailShort + "<p><img src='http://" + jobDetailImageName + "'></p>" + gJobDetail; int type = 4; int status = 0; int kind = 0; String source = "Theo http://monngonmoingay.com"; String author = null; int user_posted = 0; int user_activated = 0; int cate_id = 43; String list_productid_relation = "13,28,30"; if (!MysqlCrawler.getInstance().checkNewsUrl(news_title_seo)) { MysqlCrawler.getInstance().insertNewsContent(news_title, news_title_seo, news_meta, news_description, news_tag, news_pic, pic_note, news_subcontent, news_content, type, status, kind, source, author, user_posted, user_activated, cate_id, list_productid_relation); } // System.exit(1); } catch (Exception ex) { System.out.println("\n Fail I : " + i); System.out.println("\n Ex : " + ex); } } } /* * Header[] responseHeaders = page.getFetchResponseHeaders(); if * (responseHeaders != null) { logger.debug("Response headers:"); for * (Header header : responseHeaders) { logger.debug("\t{}: {}", * header.getName(), header.getValue()); } } */ logger.debug("============="); }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
protected DetailledItem parse_result(String html) throws IOException { Document doc = Jsoup.parse(html); DetailledItem result = new DetailledItem(); String id = null;//from w w w .j av a 2 s . c o m if (doc.select("input[name=mednr]").size() > 0) { id = doc.select("input[name=mednr]").first().val().trim(); } else if (doc.select("a[href*=mednr]").size() > 0) { String href = doc.select("a[href*=mednr]").first().attr("href"); id = getQueryParamsFirst(href).get("mednr").trim(); } result.setId(id); // check if new share button is available (allows to share a link to the standard // frameset of the OPAC instead of only the detail frame) newShareLinks = doc.select("#sharebutton").size() > 0; Elements table = doc.select("table").get(1).select("tr"); // GET COVER IMAGE String imgUrl = table.get(0) .select("img[src~=^https?://(:?images(?:-[^\\.]*)?\\.|[^\\.]*\\" + ".images-)amazon\\.com]") .attr("src"); result.setCover(imgUrl); // GET INFORMATION Copy copy = new Copy(); for (Element element : table) { String detail = element.select("td").text().trim().replace("\u00a0", ""); String title = element.select("th").text().trim().replace("\u00a0", ""); if (!title.equals("")) { if (title.contains("verliehen bis")) { if (detail.equals("")) { copy.setStatus("verfgbar"); } else { copy.setStatus("verliehen bis " + detail); } } else if (title.contains("Abteilung")) { copy.setDepartment(detail); } else if (title.contains("Signatur")) { copy.setShelfmark(detail); } else if (title.contains("Titel")) { result.setTitle(detail); } else if (!title.contains("Cover")) { result.addDetail(new Detail(title, detail)); } } } // GET RESERVATION INFO if ("verfgbar".equals(copy.getStatus()) || doc.select("a[href^=/cgi-bin/di.exe?mode=10], input.resbutton").size() == 0) { result.setReservable(false); } else { result.setReservable(true); if (doc.select("a[href^=/cgi-bin/di.exe?mode=10]").size() > 0) { // Reservation via link result.setReservation_info(doc.select("a[href^=/cgi-bin/di.exe?mode=10]").first().attr("href") .substring(1).replace(" ", "")); } else { // Reservation via form (method="get") Element form = doc.select("input.resbutton").first().parent(); result.setReservation_info(generateQuery(form)); } } if (copy.notEmpty()) result.addCopy(copy); return result; }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
@Override public ProlongResult prolong(String media, Account account, int useraction, String Selection) throws IOException { // internal convention: We add "NEW" to the media ID to show that we have the new iOPAC // version//from w ww . ja va 2 s.c o m if (media.startsWith("NEW")) { String mediaNr = media.substring(3); String html = httpGet( opac_url + "/cgi-bin/di.exe?mode=42&MedNrVerlAll=" + URLEncoder.encode(mediaNr, "UTF-8"), getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.text().contains("1 Medium wurde verl")) { return new ProlongResult(MultiStepResult.Status.OK); } else { return new ProlongResult(MultiStepResult.Status.ERROR, doc.text()); } } else { String html = httpGet(opac_url + "/" + media, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (doc.select("table th").size() > 0) { if (doc.select("h1").size() > 0) { if (doc.select("h1").first().text().contains("Hinweis")) { return new ProlongResult(MultiStepResult.Status.ERROR, doc.select("table th").first().text()); } } try { Element form = doc.select("form[name=form1]").first(); String sessionid = form.select("input[name=sessionid]").attr("value"); String mednr = form.select("input[name=mednr]").attr("value"); httpGet(opac_url + "/cgi-bin/di.exe?mode=8&kndnr=" + account.getName() + "&mednr=" + mednr + "&sessionid=" + sessionid + "&psh100=Verl%C3%A4ngern", getDefaultEncoding()); return new ProlongResult(MultiStepResult.Status.OK); } catch (Throwable e) { e.printStackTrace(); return new ProlongResult(MultiStepResult.Status.ERROR); } } return new ProlongResult(MultiStepResult.Status.ERROR); } }
From source file:de.geeksfactory.opacclient.apis.Zones.java
private DetailledItem parse_result(String id, String html) { Document doc = Jsoup.parse(html); DetailledItem result = new DetailledItem(); result.setTitle(""); boolean title_is_set = false; result.setId(id);//from w w w .j ava2 s . com String detailTrsQuery = version18 ? ".inRoundBox1 table table tr" : ".DetailDataCell table table:not(.inRecordHeader) tr"; Elements detailtrs1 = doc.select(detailTrsQuery); for (int i = 0; i < detailtrs1.size(); i++) { Element tr = detailtrs1.get(i); int s = tr.children().size(); if (tr.child(0).text().trim().equals("Titel") && !title_is_set) { result.setTitle(tr.child(s - 1).text().trim()); title_is_set = true; } else if (s > 1) { Element valchild = tr.child(s - 1); if (valchild.select("table").isEmpty()) { String val = valchild.text().trim(); if (val.length() > 0) { result.addDetail(new Detail(tr.child(0).text().trim(), val)); } } } } for (Element a : doc.select("a.SummaryActionLink")) { if (a.text().contains("Vormerken")) { result.setReservable(true); result.setReservation_info(a.attr("href")); } } Elements detaildiv = doc.select("div.record-item-new"); if (!detaildiv.isEmpty()) { for (int i = 0; i < detaildiv.size(); i++) { Element dd = detaildiv.get(i); String text = ""; for (Node node : dd.childNodes()) { if (node instanceof TextNode) { String snip = ((TextNode) node).text(); if (snip.length() > 0) { text += snip; } } else if (node instanceof Element) { if (((Element) node).tagName().equals("br")) { text += "\n"; } else { String snip = ((Element) node).text().trim(); if (snip.length() > 0) { text += snip; } } } } result.addDetail(new Detail("", text)); } } if (doc.select("span.z3988").size() > 0) { // Sometimes there is a <span class="Z3988"> item which provides // data in a standardized format. String z3988data = doc.select("span.z3988").first().attr("title").trim(); for (String pair : z3988data.split("&")) { String[] nv = pair.split("=", 2); if (nv.length == 2) { if (!nv[1].trim().equals("")) { if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) { result.setTitle(nv[1]); } else if (nv[0].equals("rft.au")) { result.addDetail(new Detail("Author", nv[1])); } } } } } // Cover if (doc.select(".BookCover, .LargeBookCover").size() > 0) { result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src")); } Elements copydivs = doc.select("div[id^=stock_]"); String pop = ""; for (int i = 0; i < copydivs.size(); i++) { Element div = copydivs.get(i); if (div.attr("id").startsWith("stock_head")) { pop = div.text().trim(); continue; } Copy copy = new Copy(); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); // This is getting very ugly - check if it is valid for libraries which are not Hamburg. // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015) int j = 0; for (Node node : div.childNodes()) { try { if (node instanceof Element) { if (((Element) node).tag().getName().equals("br")) { copy.setBranch(pop); result.addCopy(copy); j = -1; } else if (((Element) node).tag().getName().equals("b") && j == 1) { copy.setLocation(((Element) node).text()); } else if (((Element) node).tag().getName().equals("b") && j > 1) { copy.setStatus(((Element) node).text()); } j++; } else if (node instanceof TextNode) { if (j == 0) { copy.setDepartment(((TextNode) node).text()); } if (j == 2) { copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim()); } if (j == 6) { String text = ((TextNode) node).text().trim(); String date = text.substring(text.length() - 10); try { copy.setReturnDate(fmt.parseLocalDate(date)); } catch (IllegalArgumentException e) { e.printStackTrace(); } } j++; } } catch (Exception e) { e.printStackTrace(); } } } return result; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
private final static void apply(Element target, List<Renderer> rendererList, RenderAction renderAction, int startIndex, int count) { // The renderer list have to be applied recursively because the // transformer will always return a new Element clone. if (startIndex >= count) { return;/*from w ww . ja v a 2 s. com*/ } final Renderer currentRenderer = rendererList.get(startIndex); RendererType rendererType = currentRenderer.getRendererType(); switch (rendererType) { case GO_THROUGH: apply(target, rendererList, renderAction, startIndex + 1, count); return; /* case DEBUG: currentRenderer.getTransformerList().get(0).invoke(target); apply(target, rendererList, renderAction, startIndex + 1, count); return; */ case RENDER_ACTION: ((RenderActionRenderer) currentRenderer).getStyle().apply(renderAction); apply(target, rendererList, renderAction, startIndex + 1, count); return; default: // do nothing break; } String selector = currentRenderer.getSelector(); List<Transformer<?>> transformerList = currentRenderer.getTransformerList(); List<Element> elemList; if (PSEUDO_ROOT_SELECTOR.equals(selector)) { elemList = new LinkedList<Element>(); elemList.add(target); } else { elemList = new ArrayList<>(target.select(selector)); } if (elemList.isEmpty()) { if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) { elemList.add(target); transformerList.clear(); transformerList.add( new RendererTransformer(((ElementNotFoundHandler) currentRenderer).alternativeRenderer())); } else if (renderAction.isOutputMissingSelectorWarning()) { String creationInfo = currentRenderer.getCreationSiteInfo(); if (creationInfo == null) { creationInfo = ""; } else { creationInfo = " at [ " + creationInfo + " ]"; } logger.warn( "There is no element found for selector [{}]{}, if it is deserved, try Renderer#disableMissingSelectorWarning() " + "to disable this message and Renderer#enableMissingSelectorWarning could enable this warning again in " + "your renderer chain", selector, creationInfo); apply(target, rendererList, renderAction, startIndex + 1, count); return; } } else { if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) { apply(target, rendererList, renderAction, startIndex + 1, count); return; } } Element delayedElement = null; Element resultNode; // TODO we suppose that the element is listed as the order from parent // to children, so we reverse it. Perhaps we need a real order process // to ensure the wanted order. Collections.reverse(elemList); boolean renderForRoot; for (Element elem : elemList) { renderForRoot = PSEUDO_ROOT_SELECTOR.equals(selector) || rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER; if (!renderForRoot) { // faked group node will be not applied by renderers(only when the current selector is not the pseudo :root) if (elem.tagName().equals(ExtNodeConstants.GROUP_NODE_TAG) && ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE .equals(elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE))) { continue; } } if (elem == target) { delayedElement = elem; continue; } for (Transformer<?> transformer : transformerList) { resultNode = transformer.invoke(elem); elem.before(resultNode); } // for transformer elem.remove(); } // for element // if the root element is one of the process targets, we can not apply // the left renderers to original element because it will be replaced by // a new element even it is not necessary (that is how Transformer // works). if (delayedElement == null) { apply(target, rendererList, renderAction, startIndex + 1, count); } else { if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER && delayedElement instanceof Document) { delayedElement = delayedElement.child(0); } for (Transformer<?> transformer : transformerList) { resultNode = transformer.invoke(delayedElement); delayedElement.before(resultNode); apply(resultNode, rendererList, renderAction, startIndex + 1, count); } // for transformer delayedElement.remove(); } }
From source file:de.geeksfactory.opacclient.apis.Pica.java
protected DetailledItem parse_result(String html) { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);/* w w w . ja v a2 s.c om*/ DetailledItem result = new DetailledItem(); for (Element a : doc.select("a[href*=PPN")) { Map<String, String> hrefq = getQueryParamsFirst(a.absUrl("href")); String ppn = hrefq.get("PPN"); result.setId(ppn); break; } // GET COVER if (doc.select("td.preslabel:contains(ISBN) + td.presvalue").size() > 0) { Element isbnElement = doc.select("td.preslabel:contains(ISBN) + td.presvalue").first(); String isbn = ""; for (Node child : isbnElement.childNodes()) { if (child instanceof TextNode) { isbn = ((TextNode) child).text().trim(); break; } } result.setCover(ISBNTools.getAmazonCoverURL(isbn, true)); } // GET TITLE AND SUBTITLE String titleAndSubtitle; Element titleAndSubtitleElem = null; String titleRegex = ".*(Titel|Aufsatz|Zeitschrift|Gesamttitel" + "|Title|Article|Periodical|Collective\\stitle" + "|Titre|Article|P.riodique|Titre\\sg.n.ral).*"; String selector = "td.preslabel:matches(" + titleRegex + ") + td.presvalue"; if (doc.select(selector).size() > 0) { titleAndSubtitleElem = doc.select(selector).first(); titleAndSubtitle = titleAndSubtitleElem.text().trim(); int slashPosition = Math.min(titleAndSubtitle.indexOf("/"), titleAndSubtitle.indexOf(":")); String title; if (slashPosition > 0) { title = titleAndSubtitle.substring(0, slashPosition).trim(); String subtitle = titleAndSubtitle.substring(slashPosition + 1).trim(); result.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle)); } else { title = titleAndSubtitle; } result.setTitle(title); } else { result.setTitle(""); } // Details int line = 0; Elements lines = doc.select("td.preslabel + td.presvalue"); if (titleAndSubtitleElem != null) { lines.remove(titleAndSubtitleElem); } for (Element element : lines) { Element titleElem = element.firstElementSibling(); String detail = ""; if (element.select("div").size() > 1 && element.select("div").text().equals(element.text())) { boolean first = true; for (Element div : element.select("div")) { if (!div.text().replace("\u00a0", " ").trim().equals("")) { if (!first) { detail += "\n" + div.text().replace("\u00a0", " ").trim(); } else { detail += div.text().replace("\u00a0", " ").trim(); first = false; } } } } else { detail = element.text().replace("\u00a0", " ").trim(); } String title = titleElem.text().replace("\u00a0", " ").trim(); if (element.select("hr").size() > 0) // after the separator we get the copies { break; } if (detail.length() == 0 && title.length() == 0) { line++; continue; } if (title.contains(":")) { title = title.substring(0, title.indexOf(":")); // remove colon } result.addDetail(new Detail(title, detail)); if (element.select("a").size() == 1 && !element.select("a").get(0).text().trim().equals("")) { String url = element.select("a").first().absUrl("href"); if (!url.startsWith(opac_url)) { result.addDetail(new Detail(stringProvider.getString(StringProvider.LINK), url)); } } line++; } line++; // next line after separator // Copies Copy copy = new Copy(); String location = ""; // reservation info will be stored as JSON JSONArray reservationInfo = new JSONArray(); while (line < lines.size()) { Element element = lines.get(line); if (element.select("hr").size() == 0) { Element titleElem = element.firstElementSibling(); String detail = element.text().trim(); String title = titleElem.text().replace("\u00a0", " ").trim(); if (detail.length() == 0 && title.length() == 0) { line++; continue; } if (title.contains("Standort") || title.contains("Vorhanden in") || title.contains("Location")) { location += detail; } else if (title.contains("Sonderstandort")) { location += " - " + detail; } else if (title.contains("Systemstelle") || title.contains("Subject")) { copy.setDepartment(detail); } else if (title.contains("Fachnummer") || title.contains("locationnumber")) { copy.setLocation(detail); } else if (title.contains("Signatur") || title.contains("Shelf mark")) { copy.setShelfmark(detail); } else if (title.contains("Anmerkung")) { location += " (" + detail + ")"; } else if (title.contains("Link")) { result.addDetail(new Detail(title.replace(":", "").trim(), detail)); } else if (title.contains("Status") || title.contains("Ausleihinfo") || title.contains("Ausleihstatus") || title.contains("Request info")) { // Find return date Pattern pattern = Pattern.compile("(till|bis) (\\d{2}-\\d{2}-\\d{4})"); Matcher matcher = pattern.matcher(detail); if (matcher.find()) { DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN); try { copy.setStatus(detail.substring(0, matcher.start() - 1).trim()); copy.setReturnDate(fmt.parseLocalDate(matcher.group(2))); } catch (IllegalArgumentException e) { e.printStackTrace(); copy.setStatus(detail); } } else { copy.setStatus(detail); } // Get reservation info if (element.select("a:has(img[src*=inline_arrow])").size() > 0) { Element a = element.select("a:has(img[src*=inline_arrow])").first(); boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*"); JSONObject reservation = new JSONObject(); try { reservation.put("multi", multipleCopies); reservation.put("link", _extract_url(a.absUrl("href"))); reservation.put("desc", location); reservationInfo.put(reservation); } catch (JSONException e1) { e1.printStackTrace(); } result.setReservable(true); } } } else { copy.setBranch(location); result.addCopy(copy); location = ""; copy = new Copy(); } line++; } if (copy.notEmpty()) { copy.setBranch(location); result.addCopy(copy); } if (reservationInfo.length() == 0) { // No reservation info found yet, because we didn't find any copies. // If there is a reservation link somewhere in the rows we interpreted // as details, we still want to use it. if (doc.select("td a:has(img[src*=inline_arrow])").size() > 0) { Element a = doc.select("td a:has(img[src*=inline_arrow])").first(); boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*"); JSONObject reservation = new JSONObject(); try { reservation.put("multi", multipleCopies); reservation.put("link", _extract_url(a.attr("href"))); reservation.put("desc", location); reservationInfo.put(reservation); } catch (JSONException e1) { e1.printStackTrace(); } result.setReservable(true); } } result.setReservation_info(reservationInfo.toString()); // Volumes if (doc.select("a[href^=FAM?PPN=]").size() > 0) { String href = doc.select("a[href^=FAM?PPN=]").attr("href"); String ppn = getQueryParamsFirst(href).get("PPN"); Map<String, String> data = new HashMap<>(); data.put("ppn", ppn); result.setVolumesearch(data); } return result; }
From source file:de.geeksfactory.opacclient.apis.Pica.java
@Override public List<SearchField> getSearchFields() throws IOException, JSONException { if (!initialised) { start();/*from w w w . j a v a 2 s . c o m*/ } String html = httpGet(opac_url + "/LNG=" + getLang() + "/DB=" + db + "/ADVANCED_SEARCHFILTER", getDefaultEncoding()); Document doc = Jsoup.parse(html); List<SearchField> fields = new ArrayList<>(); Elements options = doc.select("select[name=IKT0] option"); for (Element option : options) { TextSearchField field = new TextSearchField(); field.setDisplayName(option.text()); field.setId(option.attr("value")); field.setHint(""); field.setData(new JSONObject("{\"ADI\": false}")); Pattern pattern = Pattern.compile("\\[X?[A-Za-z]{2,3}:?\\]|\\(X?[A-Za-z]{2,3}:?\\)"); Matcher matcher = pattern.matcher(field.getDisplayName()); if (matcher.find()) { field.getData().put("meaning", matcher.group().replace(":", "").toUpperCase()); field.setDisplayName(matcher.replaceFirst("").trim()); } fields.add(field); } Elements sort = doc.select("select[name=SRT]"); if (sort.size() > 0) { DropdownSearchField field = new DropdownSearchField(); field.setDisplayName(sort.first().parent().parent().select(".longval").first().text()); field.setId("SRT"); for (Element option : sort.select("option")) { field.addDropdownValue(option.attr("value"), option.text()); } fields.add(field); } for (Element input : doc.select("input[type=text][name^=ADI]")) { TextSearchField field = new TextSearchField(); field.setDisplayName(input.parent().parent().select(".longkey").text()); field.setId(input.attr("name")); field.setHint(input.parent().select("span").text()); field.setData(new JSONObject("{\"ADI\": true}")); fields.add(field); } for (Element dropdown : doc.select("select[name^=ADI]")) { DropdownSearchField field = new DropdownSearchField(); field.setDisplayName(dropdown.parent().parent().select(".longkey").text()); field.setId(dropdown.attr("name")); for (Element option : dropdown.select("option")) { field.addDropdownValue(option.attr("value"), option.text()); } fields.add(field); } Elements fuzzy = doc.select("input[name=FUZZY]"); if (fuzzy.size() > 0) { CheckboxSearchField field = new CheckboxSearchField(); field.setDisplayName(fuzzy.first().parent().parent().select(".longkey").first().text()); field.setId("FUZZY"); fields.add(field); } Elements mediatypes = doc.select("input[name=ADI_MAT]"); if (mediatypes.size() > 0) { DropdownSearchField field = new DropdownSearchField(); field.setDisplayName("Materialart"); field.setId("ADI_MAT"); field.addDropdownValue("", "Alle"); for (Element mt : mediatypes) { field.addDropdownValue(mt.attr("value"), mt.parent().nextElementSibling().text().replace("\u00a0", "")); } fields.add(field); } return fields; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public List<SearchField> getSearchFields() throws IOException, JSONException { if (!initialised) { start();//from w w w . ja v a2 s . com } List<SearchField> fields = new ArrayList<>(); // Read branches and media types List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("link_profis.x", "0")); nameValuePairs.add(new BasicNameValuePair("link_profis.y", "1")); String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); Document doc = Jsoup.parse(html); Elements fieldElems = doc.select(".suchfeldinhalt"); for (Element fieldElem : fieldElems) { String name = fieldElem.select(".suchfeld_inhalt_titel label").text(); String hint = ""; if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) { List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes(); if (textNodes.size() > 0) { for (TextNode node : textNodes) { String text = node.getWholeText().replace("\n", ""); if (!text.equals("")) { hint = node.getWholeText().replace("\n", ""); break; } } } } Elements inputs = fieldElem .select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select"); if (inputs.size() == 1) { fields.add(createSearchField(name, hint, inputs.get(0))); } else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) { // Two text fields, e.g. year from/to or two keywords fields.add(createSearchField(name, hint, inputs.get(0))); TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1)); secondField.setHalfWidth(true); fields.add(secondField); } else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select") && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) { // A dropdown to select from different search field types. // Break it down into single text fields. for (Element option : inputs.get(0).select("option")) { TextSearchField field = new TextSearchField(); field.setHint(hint); field.setDisplayName(option.text()); field.setId(inputs.get(1).attr("name") + "$" + option.attr("value")); JSONObject data = new JSONObject(); JSONObject params = new JSONObject(); params.put(inputs.get(0).attr("name"), option.attr("value")); data.put("additional_params", params); field.setData(data); fields.add(field); } } } DropdownSearchField orderField = new DropdownSearchField("orderselect", stringProvider.getString(StringProvider.ORDER), false, null); orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT)); orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC)); orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC)); orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC)); orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC)); orderField.setMeaning(Meaning.ORDER); fields.add(orderField); return fields; }
From source file:de.geeksfactory.opacclient.apis.TouchPoint.java
private void parseDropdown(Element dropdownElement, List<SearchField> fields) { Elements options = dropdownElement.select("option"); DropdownSearchField dropdown = new DropdownSearchField(); dropdown.setId(dropdownElement.attr("name")); // Some fields make no sense or are not supported in the app if (dropdown.getId().equals("numberOfHits") || dropdown.getId().equals("timeOut") || dropdown.getId().equals("rememberList")) { return;/*from ww w . j av a2 s . com*/ } for (Element option : options) { dropdown.addDropdownValue(option.attr("value"), option.text()); } dropdown.setDisplayName(dropdownElement.parent().select("label").text()); fields.add(dropdown); }