Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:de.geeksfactory.opacclient.apis.Littera.java

protected void addSimpleSearchField(List<SearchField> fields) throws IOException, JSONException {
    final String html = httpGet(getApiUrl() + "&mode=s", getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final Element simple = doc.select(".simple_search").first();
    final TextSearchField field = new TextSearchField();
    field.setFreeSearch(true);/* ww  w.  j a v a  2 s  . c  o m*/
    field.setDisplayName(simple.select("h4").first().text());
    field.setId(simple.select("#keyboard").first().attr("name"));
    field.setHint("");
    field.setData(new JSONObject());
    field.getData().put("meaning", field.getId());
    fields.add(field);
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

@Override
public Set<String> getSupportedLanguages() throws IOException {
    final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final String menuHtml = doc.select(".mainmenu").first().html();
    final Set<String> languages = new HashSet<>();
    for (final Map.Entry<String, String> i : LANGUAGE_CODES.entrySet()) {
        if (menuHtml.contains("lang=" + i.getValue()) /* language switch link */
                || menuHtml.contains("/" + i.getValue() + "/") /* help link */) {
            languages.add(i.getKey());/*from  w  ww  .ja va2s.c o  m*/
        }
    }
    return languages;
}

From source file:de.geeksfactory.opacclient.apis.WebOpacAt.java

@Override
public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException {
    if (!initialised) {
        start();/*ww w  .j av a 2s .  com*/
    }
    final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final Element detailData = doc.select(".detailData").first();
    final Element detailTable = detailData.select("table.titel").first();
    final Element availabilityTable = doc.select(".bibliothek table").first();

    final DetailledItem result = new DetailledItem();
    final Copy copy = new Copy();
    result.addCopy(copy);
    result.setId(id);
    result.setCover(getCover(doc));
    result.setTitle(detailData.select("h3").first().text());
    result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media")));
    copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available"));
    copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent")));
    copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations"));
    for (final Element tr : detailTable.select("tr")) {
        final String desc = tr.child(0).text();
        final String content = tr.child(1).text();
        if (desc != null && !desc.trim().isEmpty()) {
            result.addDetail(new Detail(desc, content));
        } else if (!result.getDetails().isEmpty()) {
            final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1);
            lastDetail.setHtml(true);
            lastDetail.setContent(lastDetail.getContent() + "\n" + content);
        }
    }
    return result;
}

From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java

private void createVideoCategory(Document doc, int videoId) {
    Date now = new Date();

    Elements categoryElements = doc.select("div#video_genres span.genre");
    if (CollectionUtils.isNotEmpty(categoryElements)) {
        categoryElements.stream().forEach(c -> {
            String cDescription = c.text().toString().trim();

            if (StringUtils.isNotBlank(cDescription)) {
                Category queryCategory = new Category();
                queryCategory.setSubtype(cDescription);
                Category category = categoryMapper.queryByCategory(queryCategory);
                if (null != category) {
                    VideoCategory vc = new VideoCategory();
                    vc.setCategoryId(category.getId());
                    vc.setCategoryDescription(category.getSubtype());
                    vc.setVideoId(videoId);

                    vc.setCreateTime(now);
                    vc.setUpdateTime(now);

                    videoCategoryMapper.insertSelective(vc);
                } else {
                    category = new Category();
                    category.setSubtype(cDescription);

                    category.setCreateTime(now);
                    category.setUpdateTime(now);

                    try {
                        categoryMapper.insertSelective(category);
                    } catch (Exception e) {
                    }/*from  w w  w  .  j  a  va 2 s  .co  m*/

                    int categoryId = categoryMapper.queryByCategory(category).getId();

                    VideoCategory vc = new VideoCategory();
                    vc.setCategoryId(categoryId);
                    vc.setCategoryDescription(category.getSubtype());
                    vc.setVideoId(videoId);

                    vc.setCreateTime(now);
                    vc.setUpdateTime(now);

                    videoCategoryMapper.insertSelective(vc);
                }
            }
        });
    }
}

From source file:mergedoc.core.APIDocument.java

/**
 * ? Javadoc ????/*from w ww  .j ava  2 s  .co  m*/
 * author, version ? Javadoc ???????????<br>
 * @param className ??
 * @param docHtml API 
 */
private void parseClassComment(String className, Document doc) {
    Elements elements = doc.select("body > div.contentContainer > div.description > ul > li");
    for (Element element : elements) {
        String sigStr = element.select("pre").first().html();
        Signature sig = createSignature(className, sigStr);
        Comment comment = new Comment(sig);

        // deprecated 
        String depre = "";
        Elements divs = element.select("div");
        if (divs.size() == 2) {
            depre = divs.get(0).html();
        }
        parseDeprecatedTag(className, depre, comment);

        // 
        if (divs.size() > 0) {
            String body = divs.last().html();
            body = formatLinkTag(className, body);
            comment.setDocumentBody(body);
        }

        // 
        parseCommonTag(className, element, comment);

        log.debug(sig);
        contextTable.put(sig, comment);
    }
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

protected void addAdvancedSearchFields(List<SearchField> fields) throws IOException, JSONException {
    final String html = httpGet(getApiUrl() + "&mode=a", getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final Elements options = doc.select("select#adv_search_crit_0").first().select("option");
    for (final Element option : options) {
        final SearchField field;
        if (SEARCH_FIELDS_FOR_DROPDOWN.contains(option.val())) {
            field = new DropdownSearchField();
            addDropdownValuesForField(((DropdownSearchField) field), option.val());
        } else {/* w  w w .java 2  s . c o m*/
            field = new TextSearchField();
            ((TextSearchField) field).setHint("");
        }
        field.setDisplayName(option.text());
        field.setId(option.val());
        field.setData(new JSONObject());
        field.getData().put("meaning", field.getId());
        fields.add(field);
    }
}

From source file:mobi.jenkinsci.ci.client.JenkinsFormAuthHttpClient.java

private HttpPost getForm(final HttpContext httpContext, final HttpResponse response, final String user,
        final String password) throws IllegalStateException, IOException {
    final HttpEntity entity = response.getEntity();
    final HttpHost host = (HttpHost) httpContext.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
    final String requestUri = getLatestRedirectedUrl(httpContext);
    final String requestBaseUrl = requestUri.substring(0, requestUri.lastIndexOf('/'));
    final String userFormId = getHtmlElementId(host, FormId.USER);
    final String passFormId = getHtmlElementId(host, FormId.PASS);
    final String loginFormId = getHtmlElementId(host, FormId.LOGIN_FORM);
    final String loginButton = getSsoErrorHandler(host).getSsoLoginButtonName();

    log.debug("Looking for HTML input form retrieved from " + requestUri);

    final List<NameValuePair> formNvps = new ArrayList<NameValuePair>();

    final Document doc = Jsoup.parse(entity.getContent(), "UTF-8", requestBaseUrl);
    final org.jsoup.nodes.Element form = doc
            .select("form" + (loginFormId == null ? "" : "[id=" + loginFormId + "]")).first();
    final String formAction = form.attr("action");
    final HttpPost formPost = new HttpPost(getUrl(requestBaseUrl, formAction));
    final Elements formFields = form.select("input");
    for (final Element element : formFields) {
        final String fieldName = element.attr("name");
        String fieldValue = element.attr("value");
        final String fieldId = element.attr("id");

        log.debug(String.format("Processing form field: name='%s' value='%s' id='%s'", fieldName, fieldValue,
                fieldId));//from  ww w.j  a v  a  2s  .c om

        if (fieldId.equalsIgnoreCase(userFormId)) {
            fieldValue = user;
            log.debug(String.format("Set formField user='%s'", user));
        } else if (fieldId.equalsIgnoreCase(passFormId)) {
            log.debug("Set formField password='*******'");
            fieldValue = password;
        }

        if (loginButton != null && element.attr("type").equalsIgnoreCase("submit")) {
            if (element.attr("name").equalsIgnoreCase(loginButton)) {
                formNvps.add(new BasicNameValuePair(fieldName, fieldValue));
            }
        } else {
            formNvps.add(new BasicNameValuePair(fieldName, fieldValue));
        }
    }

    formPost.setEntity(new UrlEncodedFormEntity(formNvps, "UTF-8"));
    return formPost;
}

From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java

protected VertretungsplanTag parseMonitorVertretungsplanTag(Document doc, JSONObject data)
        throws JSONException {
    VertretungsplanTag tag = new VertretungsplanTag();
    tag.setDatum(doc.select(".mon_title").first().text().replaceAll(" \\(Seite \\d+ / \\d+\\)", ""));
    if (doc.select("table.mon_head td[align=right] p").size() == 0
            || schule.getData().optBoolean("stand_links", false)) {
        tag.setStand(doc.select("body").html().substring(0, doc.select("body").html().indexOf("<p>") - 1));
    } else {//from w  ww  . j  av a 2s  . c om
        Element stand = doc.select("table.mon_head td[align=right] p").first();
        String info = stand.text();
        tag.setStand(info.substring(info.indexOf("Stand:")));
    }

    // NACHRICHTEN
    if (doc.select("table.info").size() > 0)
        parseNachrichten(doc.select("table.info").first(), data, tag);

    // VERTRETUNGSPLAN
    if (doc.select("table:has(tr.list)").size() > 0)
        parseVertretungsplanTable(doc.select("table:has(tr.list)").first(), data, tag);

    return tag;
}

From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java

@Override
public Object run() {

    String origBody = contentManager.getDownstreamResponse();
    if (origBody == null || origBody.isEmpty()) {
        return null;
    }/*from   w  w w.ja v  a 2s .com*/

    String composedBody = null;
    log.trace("Response from downstream server: " + origBody);

    Document doc = Jsoup.parse(origBody);
    if (hasReplaceableElements(doc)) {
        log.debug("We have replaceable elements. Let's get em!");
        Elements elementsToUpdate = doc.select("div[data-loc]");
        for (Element e : elementsToUpdate) {
            StringBuilder content = new StringBuilder();
            String location = e.dataset().get("loc");
            String fragmentName = e.dataset().get("fragment-name");
            String cacheName = e.dataset().get("cache-name");
            boolean useCaching = !Boolean.valueOf(e.dataset().get("disable-caching"));
            boolean failQuietly = Boolean.valueOf(e.dataset().get("fail-quietly"));
            URL url = null;
            try {
                url = new URL(location);
                String protocol = url.getProtocol();
                String service = url.getHost();

                log.debug("Fetching content at location [ " + location + " ] with cacheName = [ " + cacheName
                        + " ]");

                try {
                    RequestContext context = RequestContext.getCurrentContext();
                    ContentResponse response = contentManager.getContentFromService(location, cacheName,
                            useCaching, context);

                    log.trace(response.toString());

                    if (!response.isError()) {
                        Object resp = response.getContent();
                        if (String.class.isAssignableFrom(resp.getClass())) {
                            String subContentResponse = (String) resp;
                            //TODO You better trust the source of your downstream HTML!
                            //                    String cleanedContent = Jsoup.clean(subContentResponse, Whitelist.basic()); //this totally stripped the html out...
                            Document subDocument = Jsoup.parse(subContentResponse);

                            if (fragmentName != null) {
                                Elements fragments = subDocument
                                        .select("div[data-fragment-name=\"" + fragmentName + "\"]");

                                if (fragments != null && fragments.size() > 0) {
                                    if (fragments.size() == 1) {
                                        Element frag = fragments.first();

                                        //need to see if there are images that we need to replace the urls on
                                        Elements images = frag.select("img");
                                        for (Element i : images) {
                                            String src = i.attr("src");
                                            if (src.startsWith("/") && !src.startsWith("//")) {
                                                i.attr("src", "/cui-req://" + protocol + "://" + service + src);
                                            } //else what do we do about relative urls?
                                        }

                                        content.append(frag.toString());

                                    } else {
                                        for (Element frag : fragments) {
                                            content.append(frag.toString()).append("\n\n");
                                        }
                                    }
                                } else {
                                    log.debug("Found no matching fragments for [ " + fragmentName + " ]");
                                    if (failQuietly) {
                                        content.append("<div class='cui-error'></div>");
                                    } else {
                                        content.append(
                                                "<span class='cui-error'>Failed getting content from remote service. Possible reason in reponse below</span>");
                                        content.append(subDocument.toString());
                                    }
                                }
                            } else {
                                //take the whole thing and cram it in there!
                                content.append(subDocument.toString());
                            }
                        } else {
                            //not text...
                            if (!failQuietly) {
                                content.append(
                                        "<span class='cui-error'>Failed getting content from remote service. Reason: content was not text</span>");
                            } else {
                                content.append("<div class='cui-error'></div>");
                            }
                        }

                    } else {
                        if (!failQuietly) {
                            content.append(
                                    "<span class='cui-error'>Failed getting content from remote service. Reason: "
                                            + response.getMessage() + "</span>");
                        } else {
                            content.append("<div class='cui-error'></div>");
                        }
                    }

                    //now append it to the page
                    if (!content.toString().isEmpty()) {
                        e.html(content.toString());
                    }
                } catch (Throwable t) {
                    if (!failQuietly) {
                        e.html("<span class='cui-error'>Failed getting content from remote service. Reason: "
                                + t.getMessage() + "</span>");
                    }
                    log.warn("Failed replacing content", t);
                }
            } catch (MalformedURLException ex) {
                log.warn("location was invalid: [ " + location + " ]", ex);
                if (!failQuietly) {
                    content.append(
                            "<span class='cui-error'>Failed getting content from remote service. Reason: data-loc was an invalid location.</span>");
                } else {
                    content.append("<div class='cui-error'></div>");
                }
            }

        }

        composedBody = doc.toString();
    } else {
        log.debug("Document has no replaeable elements. Skipping");
    }

    try {
        addResponseHeaders();
        if (composedBody != null && !composedBody.isEmpty()) {
            writeResponse(composedBody, getMimeType(RequestContext.getCurrentContext()));
        } else {
            writeResponse(origBody, getMimeType(RequestContext.getCurrentContext()));
        }
    } catch (Exception ex) {
        log.error("Error sending response", ex);

    }
    return null;
}

From source file:net.devietti.ArchConfMapServlet.java

/** Fetch info for a list of conferences from WikiCFP */
private List<Conf> getConfInfo(List<String> confs) throws IOException {
    String query = StringUtils.join(confs, "+");
    List<Conf> results = new LinkedList<Conf>();

    /*/*w  w  w  .  j a va 2s .c  om*/
     * NB: year=f returns hits for this year and future years. This is exactly what we want, since
     * we automatically discard conferences that have already happened.
     */
    Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query);

    Elements rows = doc.select("div[class=contsec] table table tr");
    for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) {
        final Element firstRow = iter.next();
        final Elements confName = firstRow.select("td a");
        if (confName.isEmpty())
            continue;

        final Conf conf = new Conf();

        // make sure we match one of the conferences we're interested in
        String cn = confName.first().text().split(" ")[0];
        int found = Arrays.binarySearch(CONFERENCE_NAMES, cn);
        if (found < 0)
            continue; // not found

        final String confFullName = firstRow.select("td").get(1).text();
        // don't match other ICS conferences, eg Information, Communication, Society
        if (CONFERENCE_NAMES[found].equals("ICS")) {
            if (!confFullName.toLowerCase().contains("supercomputing")) {
                continue;
            }
        }
        // don't match other CC conferences, eg Creative Construction
        if (CONFERENCE_NAMES[found].equals("CC")) {
            if (!confFullName.toLowerCase().contains("compiler")) {
                continue;
            }
        }

        conf.name = confName.first().text();

        /*
         * we found a hit! The conference information is split across two <tr> table elements.
         * Conference name and link to cfp are in the first <tr>, and dates, location and deadline
         * in the second.
         */

        final Element secondRow = iter.next();
        String dates = secondRow.select("td").first().text();
        String startDate = dates.substring(0, dates.indexOf('-')).trim();
        conf.start = cfpDateFormat.parseDateTime(startDate);
        conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim());

        conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end);
        if (conf.start.year().equals(conf.end.year())
                && conf.start.monthOfYear().equals(conf.end.monthOfYear())) {
            conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-"
                    + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start);
        }

        String deadline = secondRow.select("td").get(2).text().trim();
        if (deadline.contains("(")) { // abstract deadline may be in parentheses
            deadline = deadline.substring(0, deadline.indexOf('(')).trim();
        }
        conf.deadline = cfpDateFormat.parseDateTime(deadline);

        conf.url = "http://www.wikicfp.com" + confName.attr("href");
        /*
         * extract the WikiCFP eventid from the link, so that, later on, the client can pull the
         * cfp page and get the direct conference site link.
         */

        com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url);
        String[] eid = url.getParameters("eventid");
        if (0 == eid.length)
            continue;
        try {
            conf.eventid = Integer.valueOf(eid[0]);
        } catch (NumberFormatException e) {
            error("invalid event id " + eid);
            continue;
        }

        conf.location = secondRow.select("td").get(1).text();

        results.add(conf);
    }
    return results;
}