List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected void addSimpleSearchField(List<SearchField> fields) throws IOException, JSONException { final String html = httpGet(getApiUrl() + "&mode=s", getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element simple = doc.select(".simple_search").first(); final TextSearchField field = new TextSearchField(); field.setFreeSearch(true);/* ww w. j a v a 2 s . c o m*/ field.setDisplayName(simple.select("h4").first().text()); field.setId(simple.select("#keyboard").first().attr("name")); field.setHint(""); field.setData(new JSONObject()); field.getData().put("meaning", field.getId()); fields.add(field); }
From source file:de.geeksfactory.opacclient.apis.Littera.java
@Override public Set<String> getSupportedLanguages() throws IOException { final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding()); final Document doc = Jsoup.parse(html); final String menuHtml = doc.select(".mainmenu").first().html(); final Set<String> languages = new HashSet<>(); for (final Map.Entry<String, String> i : LANGUAGE_CODES.entrySet()) { if (menuHtml.contains("lang=" + i.getValue()) /* language switch link */ || menuHtml.contains("/" + i.getValue() + "/") /* help link */) { languages.add(i.getKey());/*from w ww .ja va2s.c o m*/ } } return languages; }
From source file:de.geeksfactory.opacclient.apis.WebOpacAt.java
@Override public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException { if (!initialised) { start();/*ww w .j av a 2s . com*/ } final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element detailData = doc.select(".detailData").first(); final Element detailTable = detailData.select("table.titel").first(); final Element availabilityTable = doc.select(".bibliothek table").first(); final DetailledItem result = new DetailledItem(); final Copy copy = new Copy(); result.addCopy(copy); result.setId(id); result.setCover(getCover(doc)); result.setTitle(detailData.select("h3").first().text()); result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media"))); copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available")); copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent"))); copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations")); for (final Element tr : detailTable.select("tr")) { final String desc = tr.child(0).text(); final String content = tr.child(1).text(); if (desc != null && !desc.trim().isEmpty()) { result.addDetail(new Detail(desc, content)); } else if (!result.getDetails().isEmpty()) { final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1); lastDetail.setHtml(true); lastDetail.setContent(lastDetail.getContent() + "\n" + content); } } return result; }
From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java
private void createVideoCategory(Document doc, int videoId) { Date now = new Date(); Elements categoryElements = doc.select("div#video_genres span.genre"); if (CollectionUtils.isNotEmpty(categoryElements)) { categoryElements.stream().forEach(c -> { String cDescription = c.text().toString().trim(); if (StringUtils.isNotBlank(cDescription)) { Category queryCategory = new Category(); queryCategory.setSubtype(cDescription); Category category = categoryMapper.queryByCategory(queryCategory); if (null != category) { VideoCategory vc = new VideoCategory(); vc.setCategoryId(category.getId()); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); vc.setCreateTime(now); vc.setUpdateTime(now); videoCategoryMapper.insertSelective(vc); } else { category = new Category(); category.setSubtype(cDescription); category.setCreateTime(now); category.setUpdateTime(now); try { categoryMapper.insertSelective(category); } catch (Exception e) { }/*from w w w . j a va 2 s .co m*/ int categoryId = categoryMapper.queryByCategory(category).getId(); VideoCategory vc = new VideoCategory(); vc.setCategoryId(categoryId); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); vc.setCreateTime(now); vc.setUpdateTime(now); videoCategoryMapper.insertSelective(vc); } } }); } }
From source file:mergedoc.core.APIDocument.java
/** * ? Javadoc ????/*from w ww .j ava 2 s .co m*/ * author, version ? Javadoc ???????????<br> * @param className ?? * @param docHtml API */ private void parseClassComment(String className, Document doc) { Elements elements = doc.select("body > div.contentContainer > div.description > ul > li"); for (Element element : elements) { String sigStr = element.select("pre").first().html(); Signature sig = createSignature(className, sigStr); Comment comment = new Comment(sig); // deprecated String depre = ""; Elements divs = element.select("div"); if (divs.size() == 2) { depre = divs.get(0).html(); } parseDeprecatedTag(className, depre, comment); // if (divs.size() > 0) { String body = divs.last().html(); body = formatLinkTag(className, body); comment.setDocumentBody(body); } // parseCommonTag(className, element, comment); log.debug(sig); contextTable.put(sig, comment); } }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected void addAdvancedSearchFields(List<SearchField> fields) throws IOException, JSONException { final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Elements options = doc.select("select#adv_search_crit_0").first().select("option"); for (final Element option : options) { final SearchField field; if (SEARCH_FIELDS_FOR_DROPDOWN.contains(option.val())) { field = new DropdownSearchField(); addDropdownValuesForField(((DropdownSearchField) field), option.val()); } else {/* w w w .java 2 s . c o m*/ field = new TextSearchField(); ((TextSearchField) field).setHint(""); } field.setDisplayName(option.text()); field.setId(option.val()); field.setData(new JSONObject()); field.getData().put("meaning", field.getId()); fields.add(field); } }
From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java
private HttpPost getForm(final HttpContext httpContext, final HttpResponse response, final String user, final String password) throws IllegalStateException, IOException { final HttpEntity entity = response.getEntity(); final HttpHost host = (HttpHost) httpContext.getAttribute(ExecutionContext.HTTP_TARGET_HOST); final String requestUri = getLatestRedirectedUrl(httpContext); final String requestBaseUrl = requestUri.substring(0, requestUri.lastIndexOf('/')); final String userFormId = getHtmlElementId(host, FormId.USER); final String passFormId = getHtmlElementId(host, FormId.PASS); final String loginFormId = getHtmlElementId(host, FormId.LOGIN_FORM); final String loginButton = getSsoErrorHandler(host).getSsoLoginButtonName(); log.debug("Looking for HTML input form retrieved from " + requestUri); final List<NameValuePair> formNvps = new ArrayList<NameValuePair>(); final Document doc = Jsoup.parse(entity.getContent(), "UTF-8", requestBaseUrl); final org.jsoup.nodes.Element form = doc .select("form" + (loginFormId == null ? "" : "[id=" + loginFormId + "]")).first(); final String formAction = form.attr("action"); final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction)); final Elements formFields = form.select("input"); for (final Element element : formFields) { final String fieldName = element.attr("name"); String fieldValue = element.attr("value"); final String fieldId = element.attr("id"); log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue, fieldId));//from ww w.j a v a 2s .c om if (fieldId.equalsIgnoreCase(userFormId)) { fieldValue = user; log.debug(String.format("Set formField user='%s'", user)); } else if (fieldId.equalsIgnoreCase(passFormId)) { log.debug("Set formField password='*******'"); fieldValue = password; } if (loginButton != null && element.attr("type").equalsIgnoreCase("submit")) { if (element.attr("name").equalsIgnoreCase(loginButton)) { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } else { formNvps.add(new BasicNameValuePair(fieldName, fieldValue)); } } formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8")); return formPost; }
From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java
protected VertretungsplanTag parseMonitorVertretungsplanTag(Document doc, JSONObject data) throws JSONException { VertretungsplanTag tag = new VertretungsplanTag(); tag.setDatum(doc.select(".mon_title").first().text().replaceAll(" \\(Seite \\d+ / \\d+\\)", "")); if (doc.select("table.mon_head td[align=right] p").size() == 0 || schule.getData().optBoolean("stand_links", false)) { tag.setStand(doc.select("body").html().substring(0, doc.select("body").html().indexOf("<p>") - 1)); } else {//from w ww . j av a 2s . c om Element stand = doc.select("table.mon_head td[align=right] p").first(); String info = stand.text(); tag.setStand(info.substring(info.indexOf("Stand:"))); } // NACHRICHTEN if (doc.select("table.info").size() > 0) parseNachrichten(doc.select("table.info").first(), data, tag); // VERTRETUNGSPLAN if (doc.select("table:has(tr.list)").size() > 0) parseVertretungsplanTable(doc.select("table:has(tr.list)").first(), data, tag); return tag; }
From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java
@Override public Object run() { String origBody = contentManager.getDownstreamResponse(); if (origBody == null || origBody.isEmpty()) { return null; }/*from w w w.ja v a 2s .com*/ String composedBody = null; log.trace("Response from downstream server: " + origBody); Document doc = Jsoup.parse(origBody); if (hasReplaceableElements(doc)) { log.debug("We have replaceable elements. Let's get em!"); Elements elementsToUpdate = doc.select("div[data-loc]"); for (Element e : elementsToUpdate) { StringBuilder content = new StringBuilder(); String location = e.dataset().get("loc"); String fragmentName = e.dataset().get("fragment-name"); String cacheName = e.dataset().get("cache-name"); boolean useCaching = !Boolean.valueOf(e.dataset().get("disable-caching")); boolean failQuietly = Boolean.valueOf(e.dataset().get("fail-quietly")); URL url = null; try { url = new URL(location); String protocol = url.getProtocol(); String service = url.getHost(); log.debug("Fetching content at location [ " + location + " ] with cacheName = [ " + cacheName + " ]"); try { RequestContext context = RequestContext.getCurrentContext(); ContentResponse response = contentManager.getContentFromService(location, cacheName, useCaching, context); log.trace(response.toString()); if (!response.isError()) { Object resp = response.getContent(); if (String.class.isAssignableFrom(resp.getClass())) { String subContentResponse = (String) resp; //TODO You better trust the source of your downstream HTML! // String cleanedContent = Jsoup.clean(subContentResponse, Whitelist.basic()); //this totally stripped the html out... Document subDocument = Jsoup.parse(subContentResponse); if (fragmentName != null) { Elements fragments = subDocument .select("div[data-fragment-name=\"" + fragmentName + "\"]"); if (fragments != null && fragments.size() > 0) { if (fragments.size() == 1) { Element frag = fragments.first(); //need to see if there are images that we need to replace the urls on Elements images = frag.select("img"); for (Element i : images) { String src = i.attr("src"); if (src.startsWith("/") && !src.startsWith("//")) { i.attr("src", "/cui-req://" + protocol + "://" + service + src); } //else what do we do about relative urls? } content.append(frag.toString()); } else { for (Element frag : fragments) { content.append(frag.toString()).append("\n\n"); } } } else { log.debug("Found no matching fragments for [ " + fragmentName + " ]"); if (failQuietly) { content.append("<div class='cui-error'></div>"); } else { content.append( "<span class='cui-error'>Failed getting content from remote service. Possible reason in reponse below</span>"); content.append(subDocument.toString()); } } } else { //take the whole thing and cram it in there! content.append(subDocument.toString()); } } else { //not text... if (!failQuietly) { content.append( "<span class='cui-error'>Failed getting content from remote service. Reason: content was not text</span>"); } else { content.append("<div class='cui-error'></div>"); } } } else { if (!failQuietly) { content.append( "<span class='cui-error'>Failed getting content from remote service. Reason: " + response.getMessage() + "</span>"); } else { content.append("<div class='cui-error'></div>"); } } //now append it to the page if (!content.toString().isEmpty()) { e.html(content.toString()); } } catch (Throwable t) { if (!failQuietly) { e.html("<span class='cui-error'>Failed getting content from remote service. Reason: " + t.getMessage() + "</span>"); } log.warn("Failed replacing content", t); } } catch (MalformedURLException ex) { log.warn("location was invalid: [ " + location + " ]", ex); if (!failQuietly) { content.append( "<span class='cui-error'>Failed getting content from remote service. Reason: data-loc was an invalid location.</span>"); } else { content.append("<div class='cui-error'></div>"); } } } composedBody = doc.toString(); } else { log.debug("Document has no replaeable elements. Skipping"); } try { addResponseHeaders(); if (composedBody != null && !composedBody.isEmpty()) { writeResponse(composedBody, getMimeType(RequestContext.getCurrentContext())); } else { writeResponse(origBody, getMimeType(RequestContext.getCurrentContext())); } } catch (Exception ex) { log.error("Error sending response", ex); } return null; }
From source file:net.devietti.ArchConfMapServlet.java
/** Fetch info for a list of conferences from WikiCFP */ private List<Conf> getConfInfo(List<String> confs) throws IOException { String query = StringUtils.join(confs, "+"); List<Conf> results = new LinkedList<Conf>(); /*/*w w w . j a va 2s .c om*/ * NB: year=f returns hits for this year and future years. This is exactly what we want, since * we automatically discard conferences that have already happened. */ Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query); Elements rows = doc.select("div[class=contsec] table table tr"); for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) { final Element firstRow = iter.next(); final Elements confName = firstRow.select("td a"); if (confName.isEmpty()) continue; final Conf conf = new Conf(); // make sure we match one of the conferences we're interested in String cn = confName.first().text().split(" ")[0]; int found = Arrays.binarySearch(CONFERENCE_NAMES, cn); if (found < 0) continue; // not found final String confFullName = firstRow.select("td").get(1).text(); // don't match other ICS conferences, eg Information, Communication, Society if (CONFERENCE_NAMES[found].equals("ICS")) { if (!confFullName.toLowerCase().contains("supercomputing")) { continue; } } // don't match other CC conferences, eg Creative Construction if (CONFERENCE_NAMES[found].equals("CC")) { if (!confFullName.toLowerCase().contains("compiler")) { continue; } } conf.name = confName.first().text(); /* * we found a hit! The conference information is split across two <tr> table elements. * Conference name and link to cfp are in the first <tr>, and dates, location and deadline * in the second. */ final Element secondRow = iter.next(); String dates = secondRow.select("td").first().text(); String startDate = dates.substring(0, dates.indexOf('-')).trim(); conf.start = cfpDateFormat.parseDateTime(startDate); conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim()); conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end); if (conf.start.year().equals(conf.end.year()) && conf.start.monthOfYear().equals(conf.end.monthOfYear())) { conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-" + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start); } String deadline = secondRow.select("td").get(2).text().trim(); if (deadline.contains("(")) { // abstract deadline may be in parentheses deadline = deadline.substring(0, deadline.indexOf('(')).trim(); } conf.deadline = cfpDateFormat.parseDateTime(deadline); conf.url = "http://www.wikicfp.com" + confName.attr("href"); /* * extract the WikiCFP eventid from the link, so that, later on, the client can pull the * cfp page and get the direct conference site link. */ com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url); String[] eid = url.getParameters("eventid"); if (0 == eid.length) continue; try { conf.eventid = Integer.valueOf(eid[0]); } catch (NumberFormatException e) { error("invalid event id " + eid); continue; } conf.location = secondRow.select("td").get(1).text(); results.add(conf); } return results; }