List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public DetailledItem getResultById(String id, final String homebranch) throws IOException { if (sessid == null) { start();// w ww .j av a 2 s . c om } // Homebranch if (homebranch != null && !"".equals(homebranch)) { cookieStore.addCookie(new BasicClientCookie("zweig", homebranch)); } String html = httpGet(opac_url + "/titel.cgi?katkey=" + id + "&sess=" + sessid, ENCODING, false, cookieStore); Document doc = Jsoup.parse(html); DetailledItem item = new DetailledItem(); item.setId(id); Elements table = doc.select(".titelsatz tr"); for (Element tr : table) { if (tr.select("th").size() == 0 || tr.select("td").size() == 0) { continue; } String d = tr.select("th").first().text(); String c = tr.select("td").first().text(); if (d.equals("Titel:")) { item.setTitle(c); } else if ((d.contains("URL") || d.contains("Link")) && tr.select("td a").size() > 0) { item.addDetail(new Detail(d, tr.select("td a").first().attr("href"))); } else { item.addDetail(new Detail(d, c)); } } if (doc.select(".ex table tr").size() > 0) { table = doc.select(".ex table tr"); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); for (Element tr : table) { if (tr.hasClass("exueber") || tr.select(".exsig").size() == 0 || tr.select(".exso").size() == 0 || tr.select(".exstatus").size() == 0) { continue; } Copy copy = new Copy(); copy.setShelfmark(tr.select(".exsig").first().text()); copy.setBranch(tr.select(".exso").first().text()); String status = tr.select(".exstatus").first().text(); if (status.contains("entliehen bis")) { copy.setReturnDate(fmt.parseLocalDate(status.replaceAll("entliehen bis ([0-9.]+) .*", "$1"))); copy.setReservations(status.replaceAll(".*\\(.*Vormerkungen: ([0-9]+)\\)", "$1")); copy.setStatus("entliehen"); } else { copy.setStatus(status); } item.addCopy(copy); } } for (Element a : doc.select(".status1 a")) { if (a.attr("href").contains("bestellung.cgi")) { item.setReservable(true); item.setReservation_info(id); break; } } for (Element a : doc.select(".titelsatz a")) { if (a.text().trim().matches("B.+nde")) { Map<String, String> volumesearch = new HashMap<>(); volumesearch.put("query", getQueryParamsFirst(a.attr("href")).get("query")); item.setVolumesearch(volumesearch); } } return item; }
From source file:de.geeksfactory.opacclient.apis.Zones22.java
@Override public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection) throws IOException { String reservation_info = item.getReservation_info(); String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding()); Document doc = Jsoup.parse(html); if (html.contains("Geheimnummer")) { List<NameValuePair> params = new ArrayList<NameValuePair>(); for (Element input : doc.select("#MainForm input")) { if (!input.attr("name").equals("BRWR") && !input.attr("name").equals("PIN")) { params.add(new BasicNameValuePair(input.attr("name"), input.attr("value"))); }/*w w w . jav a 2 s . c o m*/ } params.add(new BasicNameValuePair("BRWR", acc.getName())); params.add(new BasicNameValuePair("PIN", acc.getPassword())); html = httpGet(opac_url + "/" + doc.select("#MainForm").attr("action") + "?" + URLEncodedUtils.format(params, getDefaultEncoding()), getDefaultEncoding()); doc = Jsoup.parse(html); } if (useraction == ReservationResult.ACTION_BRANCH) { List<NameValuePair> params = new ArrayList<NameValuePair>(); for (Element input : doc.select("#MainForm input")) { if (!input.attr("name").equals("Confirm")) { params.add(new BasicNameValuePair(input.attr("name"), input.attr("value"))); } } params.add(new BasicNameValuePair("MakeResTypeDef.Reservation.RecipientLocn", selection)); params.add(new BasicNameValuePair("Confirm", "1")); html = httpGet(opac_url + "/" + doc.select("#MainForm").attr("action") + "?" + URLEncodedUtils.format(params, getDefaultEncoding()), getDefaultEncoding()); return new ReservationResult(MultiStepResult.Status.OK); } if (useraction == 0) { ReservationResult res = null; for (Node n : doc.select("#MainForm").first().childNodes()) { if (n instanceof TextNode) { if (((TextNode) n).text().contains("Entgelt")) { res = new ReservationResult(ReservationResult.Status.CONFIRMATION_NEEDED); List<String[]> details = new ArrayList<String[]>(); details.add(new String[] { ((TextNode) n).text().trim() }); res.setDetails(details); res.setMessage(((TextNode) n).text().trim()); res.setActionIdentifier(MultiStepResult.ACTION_CONFIRMATION); } } } if (res != null) return res; } if (doc.select("#MainForm select").size() > 0) { ReservationResult res = new ReservationResult(ReservationResult.Status.SELECTION_NEEDED); Map<String, String> sel = new HashMap<String, String>(); for (Element opt : doc.select("#MainForm select option")) { sel.put(opt.attr("value"), opt.text().trim()); } res.setSelection(sel); res.setMessage("Bitte Zweigstelle auswhlen"); res.setActionIdentifier(ReservationResult.ACTION_BRANCH); return res; } return new ReservationResult(ReservationResult.Status.ERROR); }
From source file:de.geeksfactory.opacclient.apis.Zones22.java
@Override public List<SearchField> getSearchFields() throws ClientProtocolException, IOException { List<SearchField> fields = new ArrayList<SearchField>(); String html = httpGet(/* w w w . ja va2 s . co m*/ opac_url + "/APS_ZONES?fn=AdvancedSearch&Style=Portal3&SubStyle=&Lang=GER&ResponseEncoding=utf-8", getDefaultEncoding()); Document doc = Jsoup.parse(html); // Textfelder auslesen Elements txt_opts = doc.select("#formSelectTerm_1 option"); for (Element opt : txt_opts) { TextSearchField field = new TextSearchField(); field.setId(opt.attr("value")); field.setHint(""); field.setDisplayName(opt.text()); fields.add(field); } // Zweigstellen auslesen Elements zst_opts = doc.select(".TabRechAv .limitChoice label"); if (zst_opts.size() > 0) { DropdownSearchField brDropdown = new DropdownSearchField(); brDropdown.setId(zst_opts.get(0).parent().select("input").attr("name")); brDropdown.setDisplayName("Zweigstelle"); List<Map<String, String>> brOptions = new ArrayList<Map<String, String>>(); Map<String, String> all = new HashMap<String, String>(); all.put("key", ""); all.put("value", "Alle"); brOptions.add(all); for (Element opt : zst_opts) { Map<String, String> value = new HashMap<String, String>(); value.put("key", opt.attr("for")); value.put("value", opt.text().trim()); brOptions.add(value); } brDropdown.setDropdownValues(brOptions); fields.add(brDropdown); } return fields; }
From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java
public void parseWxOpenId() { Document doc;// www . j av a 2s . co m try { // need http protocol // doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE+ wxFoo.getSubscribeId()).get(); doc = Jsoup.connect("http://weixin.sogou.com/weixin?type=1&query=" + wxFoo.getSubscribeId() + "&fr=sgsearch&ie=utf8&_ast=1423915648&_asf=null&w=01019900&cid=null&sut=19381").get(); LOG.debug("openID html INFO:" + doc.html()); // get page title String title = doc.title(); LOG.debug("title : " + title); // get all "?:" value of html <span> //Elements openIdLink = doc.select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELEMENTS).select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELE_IDENTITY); Elements openIdLink = doc.getElementsByClass("wx-rb"); Element a = null; String openIdLinkHref = ""; if (openIdLink != null && openIdLink.size() > 0) { Iterator<Element> itea = openIdLink.iterator(); while (itea.hasNext()) { a = itea.next(); LOG.debug("openID html INFO:" + a.html()); if (a.getElementsByTag("em").html().indexOf(wxFoo.getSubscribeId()) != -1) { break; } } } if (a != null) { openIdLinkHref = a.attr("href"); } LOG.debug("openIdLinkHref:" + openIdLinkHref); // FIXME:???? if (this.wxFoo.getOpenId() == null && openIdLinkHref.length() > 0) { this.wxFoo.setOpenId(openIdLinkHref.split(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_KEYWORDS)[1]); LOG.info("saved wxOpenId value: " + this.wxFoo.getOpenId()); GlobalVariables.wxFooListWithOpenId.add(this.wxFoo); // File reporting new FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID, GlobalVariables.wxFooListWithOpenId, FileReporter.REPORTER_TYPE.R_T_OPENID, FileReporter.REPORTER_FILE_TYPE.EXCEL).write(); // Then,OpenID JSON site parse if (this.wxFoo.getOpenId() != null) { // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), this.wxFoo.getOpenId() }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } else { LOG.warn("SogouSearchQuery getOpenId Failure! site info:" + wxFoo.getCode()); // TODO write those info to File or DB for collect which // agency not open weixin service // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), "" }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); LOG.warn("Can not get subsriber info: " + this.wxFoo.getCode()); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } }
From source file:org.confab.PhpBB3Parser.java
/** * Parses each topic for a particular forum. * @param forum Document of html containing topics * @param parent Forum the threads belong to * @return List of ForumThread objects *///from w w w. jav a 2 s . co m public List<ForumThread> parseForumThreads(Document forum, Forum parent) { Utilities.debug("parseForumThreads"); List<ForumThread> ret = new ArrayList<ForumThread>(); // Get topic table Elements thread_table_tds = forum.select("tbody[id*=threadbits_forum_] td"); if (thread_table_tds.isEmpty()) { Utilities.debug("It seems " + parent.url + " has no topics."); return ret; } // Get any stickies Elements stickies = thread_table_tds.select("td:contains(Sticky:) a[id*=thread_title_]"); // Get all topics Elements els_a = thread_table_tds.select("a[id*=thread_title_]"); assert !els_a.isEmpty(); // Loop topics and grab info about each for (Element el_a : els_a) { ForumThread new_topic = new ForumThread(parent); // Get topic new_topic.title = el_a.text(); assert new_topic.title != null; Utilities.debug("new_topic.title: " + new_topic.title); // Check if sticky if (stickies.html().contains(new_topic.title)) { new_topic.isSticky = true; Utilities.debug("new_topic.isSticky: " + new_topic.isSticky); } // Get URL new_topic.url = el_a.attr("href"); assert new_topic.url != null; Utilities.debug("new_topic.url:" + new_topic.url); ret.add(new_topic); } Utilities.debug("end printForumThreads"); return ret; }
From source file:cw.kop.autobackground.files.DownloadThread.java
private Set<String> compileImageLinks(Document doc, String tag, String attr) { Elements downloadLinks = doc.select(tag); Set<String> links = new HashSet<>(); for (Element link : downloadLinks) { String url = link.attr(attr); if (!url.contains("http")) { url = "http:" + url; }/*from w w w. ja v a 2 s . c o m*/ if (link.attr("width") != null && !link.attr("width").equals("")) { try { if (Integer.parseInt(link.attr("width")) < AppSettings.getImageWidth() || Integer.parseInt(link.attr("height")) < AppSettings.getImageHeight()) { continue; } } catch (NumberFormatException e) { } } if (url.contains(".png") || url.contains(".jpg") || url.contains(".jpeg")) { links.add(url); } else if (AppSettings.forceDownload() && url.length() > 5 && (url.contains(".com") || url.contains(".org") || url.contains(".net"))) { links.add(url + ".png"); links.add(url + ".jpg"); links.add(url); } } return links; }
From source file:us.colloquy.index.IndexHandler.java
public void getURIForAllDiaries(List<DocumentPointer> documentPointers, Path pathToLetters) { List<Path> results = new ArrayList<>(); int maxDepth = 6; try (Stream<Path> stream = Files.find(pathToLetters, maxDepth, (path, attr) -> { return String.valueOf(path).endsWith(".ncx"); })) {/*from ww w . ja v a2s.co m*/ stream.forEach(results::add); } catch (IOException e) { e.printStackTrace(); } System.out.println("files: " + results.size()); try { for (Path res : results) { Path parent = res.getParent(); // System.out.println("---------------------------------------------"); // System.out.println(parent.toString()); //use jsoup to list all files that contain something useful Document doc = Jsoup.parse(res.toFile(), "UTF-8"); String title = ""; for (Element element : doc.getElementsByTag("docTitle")) { //Letter letter = new Letter(); // StringBuilder content = new StringBuilder(); for (Element child : element.children()) { title = child.text(); // System.out.println("Title: " + title); } } // System.out.println("========================== " + res.toString() + " =========================="); boolean startPrinting = false; boolean newFile = true; for (Element element : doc.getElementsByTag("navPoint")) { //get nav label and content Element navLabelElement = element.select("navLabel").first(); Element srsElement = element.select("content").first(); String navLabel = ""; String srs = ""; if (navLabelElement != null) { navLabel = navLabelElement.text().replaceAll("\\*", "").trim(); } if (srsElement != null) { srs = srsElement.attr("src"); } if ("??".matches(navLabel)) { startPrinting = false; // System.out.println("----------------- end of file pointer ---------------"); } if (StringUtils.isNotEmpty(navLabel) && navLabel.matches("??.*|?? ?.*") && newFile) { newFile = false; startPrinting = true; } if (startPrinting && !navLabel .matches("(|??? ??)")) { // System.out.println("----------------- file pointer ---------------"); // System.out.println(navLabel + "\t" + srs); DocumentPointer documentPointer = new DocumentPointer( parent.toString() + File.separator + srs.replaceAll("#.*", ""), title); documentPointers.add(documentPointer); } } // System.out.println("========================== END OF FILE =========================="); } } catch (Exception e) { e.printStackTrace(); } System.out.println("Size: " + documentPointers.size()); // for (DocumentPointer pointer : documentPointers) // { //parse and // System.out.println(pointer.getSourse() + "\t" + pointer.getUri()); }
From source file:se.vgregion.portal.iframe.controller.CSViewController.java
private String encodeRaindancePassword(String uid, PortletConfig portletConfig) { try {// w ww . j a v a 2 s.c om final int timeout = 5000; Document doc = new JSoupHelper().invoke(new URL(portletConfig.getSrc()), timeout); Element dynamicValue = findButtonWithIdWhichStartsWith(doc, "loginForm:j_idt"); if (dynamicValue == null) { // todo Want to send an email to notify the LOGGER.error("No element found which starts with \"loginForm:j_idt\".", new RuntimeException()); return ""; } String onClick = dynamicValue.attr("onclick"); final int i = 3; String sessionKey = onClick.split("'")[i]; UserSiteCredential siteCredential = credentialService.getUserSiteCredential(uid, portletConfig.getSiteKey()); return encodeRaindance(siteCredential.getSitePassword(), sessionKey); } catch (Exception e) { e.printStackTrace(); return ""; } }
From source file:de.geeksfactory.opacclient.apis.Heidi.java
@Override public List<SearchField> getSearchFields() throws IOException, OpacErrorException, JSONException { String html = httpGet(opac_url + "/search.cgi?art=f", ENCODING, false, cookieStore); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);// w ww .j ava 2 s .c o m List<SearchField> fields = new ArrayList<>(); Elements options = doc.select("select[name=kat1] option"); for (Element option : options) { TextSearchField field = new TextSearchField(); field.setDisplayName(option.text()); field.setId(option.attr("value")); field.setHint(""); fields.add(field); } DropdownSearchField field = new DropdownSearchField(); Elements zst_opts = doc.select("#teilk2 option"); for (int i = 0; i < zst_opts.size(); i++) { Element opt = zst_opts.get(i); if (!opt.val().equals("")) { field.addDropdownValue(opt.val(), opt.text()); } } field.setDisplayName("Einrichtung"); field.setId("f[teil2]"); field.setVisible(true); field.setMeaning(SearchField.Meaning.BRANCH); fields.add(field); try { field = new DropdownSearchField(); Document doc2 = Jsoup .parse(httpGet(opac_url + "/zweigstelle.cgi?sess=" + sessid, ENCODING, false, cookieStore)); Elements home_opts = doc2.select("#zweig option"); for (int i = 0; i < home_opts.size(); i++) { Element opt = home_opts.get(i); if (!opt.val().equals("")) { Map<String, String> option = new HashMap<>(); option.put("key", opt.val()); option.put("value", opt.text()); field.addDropdownValue(opt.val(), opt.text()); } } field.setDisplayName("Leihstelle"); field.setId("_heidi_branch"); field.setVisible(true); field.setMeaning(SearchField.Meaning.HOME_BRANCH); fields.add(field); } catch (IOException e) { e.printStackTrace(); } TextSearchField pagefield = new TextSearchField(); pagefield.setId("_heidi_page"); pagefield.setVisible(false); pagefield.setDisplayName("Seite"); pagefield.setHint(""); fields.add(pagefield); return fields; }
From source file:de.geeksfactory.opacclient.apis.Pica.java
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException { Document doc = Jsoup.parse(html); updateSearchSetValue(doc);//from ww w .jav a2s .c o m if (doc.select(".error").size() > 0) { String error = doc.select(".error").first().text().trim(); if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found") || error.equals("Er is niets gevonden.") || error.equals("Rien n'a t trouv.")) { // nothing found return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1); } else { // error throw new OpacErrorException(error); } } reusehtml = html; int results_total; String resultnumstr = doc.select(".pages").first().text(); Pattern p = Pattern.compile("[0-9]+$"); Matcher m = p.matcher(resultnumstr); if (m.find()) { resultnumstr = m.group(); } if (resultnumstr.contains("(")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1")); } else if (resultnumstr.contains(": ")) { results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1")); } else { results_total = Integer.parseInt(resultnumstr); } List<SearchResult> results = new ArrayList<>(); if (results_total == 1) { // Only one result DetailledItem singleResult = parse_result(html); SearchResult sr = new SearchResult(); sr.setType(getMediaTypeInSingleResult(html)); sr.setInnerhtml( "<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent()); results.add(sr); } Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]"); // identifier = null; Elements links = doc.select("table[summary=hitlist] a"); boolean haslink = false; for (int i = 0; i < links.size(); i++) { Element node = links.get(i); if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) { haslink = true; try { List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")), getDefaultEncoding()); for (NameValuePair nv : anyurl) { if (nv.getName().equals("identifier")) { // identifier = nv.getValue(); break; } } } catch (Exception e) { e.printStackTrace(); } } } for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); if (tr.select("td.hit img").size() > 0) { String[] fparts = tr.select("td img").get(0).attr("src").split("/"); String fname = fparts[fparts.length - 1]; if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } else { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } Element middlething = tr.child(2); List<Node> children = middlething.childNodes(); int childrennum = children.size(); List<String[]> strings = new ArrayList<>(); for (int ch = 0; ch < childrennum; ch++) { Node node = children.get(ch); if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (text.length() > 3) { strings.add(new String[] { "text", "", text }); } } else if (node instanceof Element) { List<Node> subchildren = node.childNodes(); for (int j = 0; j < subchildren.size(); j++) { Node subnode = subchildren.get(j); if (subnode instanceof TextNode) { String text = ((TextNode) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") }); } } else if (subnode instanceof Element) { String text = ((Element) subnode).text().trim(); if (text.length() > 3) { strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") }); } } } } } StringBuilder description = new StringBuilder(); int k = 0; for (String[] part : strings) { if (part[0].equals("a") && k == 0) { description.append("<b>").append(part[2]).append("</b>"); } else if (k < 3) { description.append("<br />").append(part[2]); } k++; } sr.setInnerhtml(description.toString()); sr.setNr(10 * (page - 1) + i); sr.setId(null); results.add(sr); } resultcount = results.size(); return new SearchRequestResult(results, results_total, page); }