Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java

@Override
public List<Seminar> parseSeminars() throws StudipException {
    ensureLoggedIn();/* w  ww  .ja v a 2  s  . c o m*/

    navigate(PAGE_SEMINARS);

    Elements events = document.select("#content>table:first-of-type>tbody>tr");
    List<Seminar> seminars = new ArrayList<>();
    for (org.jsoup.nodes.Element event : events) {
        if (event.select(">td").size() > 4) {
            Elements info = event.select(">td:nth-of-type(4)>a:first-of-type");
            Elements font = info.select("font");
            if (info.size() >= 1 && font.size() >= 2) {
                Seminar seminar = Seminar.getSeminar(info.get(0).absUrl("href"), font.get(0).text().trim(),
                        font.get(1).text().trim());
                seminars.add(seminar);
            }
        }
    }
    log.debug("Parsed " + seminars.size() + " seminars.");
    log.trace(seminars.toString());
    return seminars;
}

From source file:me.vertretungsplan.parser.SVPlanParser.java

private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();
    if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0
            || doc.title().startsWith("Vertretungsplan fr "))) {
        setDate(svp, doc, day);/*ww  w  . j  ava  2  s.c  om*/
        if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) {

            Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr");
            String lastLesson = "";
            String lastClass = "";
            for (Element row : rows) {
                if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header"))
                        || row.select("th").size() > 0 || row.text().trim().equals("")) {
                    continue;
                }

                Substitution substitution = new Substitution();

                for (Element column : row.select("td")) {
                    String type = column.className();
                    if (!hasData(column.text())) {
                        if ((type.startsWith("svp-stunde") || type.startsWith("Stunde"))
                                && hasData(lastLesson)) {
                            substitution.setLesson(lastLesson);
                        } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse"))
                                && hasData(lastClass)) {
                            substitution.getClasses().addAll(Arrays
                                    .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        }
                        continue;
                    }
                    if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) {
                        substitution.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) {
                        substitution.getClasses().addAll(Arrays
                                .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        lastClass = column.text();
                    } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setPreviousTeacher(column.text());
                        }
                    } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setTeacher(column.text().replaceAll(" \\+$", ""));
                        }
                    } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) {
                        substitution.setSubject(column.text());
                    } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) {
                        substitution.setDesc(column.text());
                        String recognizedType = recognizeType(column.text());
                        substitution.setType(recognizedType);
                        substitution.setColor(colorProvider.getColor(recognizedType));
                    } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) {
                        substitution.setRoom(column.text());
                    }
                }

                if (substitution.getType() == null) {
                    substitution.setType("Vertretung");
                    substitution.setColor(colorProvider.getColor("Vertretung"));
                }

                day.addSubstitution(substitution);
            }
        }
        if (svp.select(".LehrerVerplant").size() > 0) {
            day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text());
        }
        if (svp.select(".Abwesenheiten").size() > 0) {
            day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text());
        }

        if (svp.select("h2:contains(Mitteilungen)").size() > 0) {
            Element h2 = svp.select("h2:contains(Mitteilungen)").first();
            Element sibling = h2.nextElementSibling();
            while (sibling != null && sibling.tagName().equals("p")) {
                for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
                sibling = sibling.nextElementSibling();
            }
        } else if (svp.select(".Mitteilungen").size() > 0) {
            for (Element p : svp.select(".Mitteilungen")) {
                for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
            }
        }
        v.addDay(day);
    } else {
        throw new IOException("keine SVPlan-Tabelle gefunden");
    }
}

From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java

/**
 * ???//www. ja v  a2s . c  o  m
 * 
 * @param eleTrs
 * @param rowNo
 * @return
 */
private String parseDetailTr(Element eleTr) throws Exception {
    Element eleTd = eleTr.select("td").get(1);

    // td
    if (eleTd.children().size() > 0) {
        return eleTd.child(0).html();
    } else {
        return eleTd.html().trim();
    }
}

From source file:org.confab.PhpBB3Parser.java

public List<Forum> parseForums(Document root, BulletinBoard parent) {
    Utilities.debug("parseForums");

    List<Forum> ret = new ArrayList<Forum>();

    // get table//from   w w w . j  av  a  2 s  . c  o m
    Elements forum_tables = root.select("ul[class=topiclist forums]");
    assert !forum_tables.isEmpty() : root.html();

    for (Element forum_table : forum_tables) {
        Elements els_li = forum_table.select("li.row");
        assert !els_li.isEmpty();
        for (Element el_li : els_li) {
            Forum new_forum = new Forum(parent);

            // Get the forum url
            Elements els_a = el_li.select("a.forumtitle");
            Element el_a = els_a.first();
            assert el_a != null;
            new_forum.url = el_a.attr("href");
            assert new_forum.url != null;
            Utilities.debug("new_forum.url : " + new_forum.url);

            // Get the title text
            new_forum.title = el_a.text();
            assert new_forum.title != null;
            Utilities.debug("new_forum.title : " + new_forum.title);

            // Check for any subforums in remaining a elements
            els_a.remove(els_a.first());
            for (Element _el_a : els_a) {
                Forum sub_forum = new Forum(parent);
                sub_forum.url = el_a.attr("href");
                assert sub_forum.url != null;
                sub_forum.title = el_a.text();
                assert sub_forum.title != null;
                new_forum.subForums.add(sub_forum);
                Utilities.debug("added subForum: " + sub_forum.title);
            }

            // Get the description/message of this topic
            String el_description = el_a.parent().text();
            if (el_description != null) {
                new_forum.description = el_description;
            } else {
                new_forum.description = "";
            }
            Utilities.debug("new_forum.description : " + new_forum.description);

            Utilities.debug("new_forum.parent.url : " + new_forum.parent.url);

            ret.add(new_forum);
            Utilities.debug("-----");
        }
    }
    Utilities.debug("end parseForums");
    return ret;
}

From source file:mergedoc.core.APIDocument.java

/**
 * ? Javadoc ????/*from  w  w w .  j  a v a  2  s .  co m*/
 * author, version ? Javadoc ???????????<br>
 * @param className ??
 * @param docHtml API 
 */
private void parseClassComment(String className, Document doc) {
    Elements elements = doc.select("body > div.contentContainer > div.description > ul > li");
    for (Element element : elements) {
        String sigStr = element.select("pre").first().html();
        Signature sig = createSignature(className, sigStr);
        Comment comment = new Comment(sig);

        // deprecated 
        String depre = "";
        Elements divs = element.select("div");
        if (divs.size() == 2) {
            depre = divs.get(0).html();
        }
        parseDeprecatedTag(className, depre, comment);

        // 
        if (divs.size() > 0) {
            String body = divs.last().html();
            body = formatLinkTag(className, body);
            comment.setDocumentBody(body);
        }

        // 
        parseCommonTag(className, element, comment);

        log.debug(sig);
        contextTable.put(sig, comment);
    }
}

From source file:de.geeksfactory.opacclient.apis.WebOpacAt.java

@Override
public DetailledItem getResultById(String id, String homebranch) throws IOException, OpacErrorException {
    if (!initialised) {
        start();/*from  w ww  .  j  ava  2  s  . c  o  m*/
    }
    final String html = httpGet(getApiUrl() + "&view=detail&id=" + id, getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final Element detailData = doc.select(".detailData").first();
    final Element detailTable = detailData.select("table.titel").first();
    final Element availabilityTable = doc.select(".bibliothek table").first();

    final DetailledItem result = new DetailledItem();
    final Copy copy = new Copy();
    result.addCopy(copy);
    result.setId(id);
    result.setCover(getCover(doc));
    result.setTitle(detailData.select("h3").first().text());
    result.setMediaType(MEDIA_TYPES.get(getCellContent(detailTable, "Medienart|Type of media")));
    copy.setStatus(getCellContent(availabilityTable, "Verfgbar|Available"));
    copy.setReturnDate(parseCopyReturn(getCellContent(availabilityTable, "Exemplare verliehen|Copies lent")));
    copy.setReservations(getCellContent(availabilityTable, "Reservierungen|Reservations"));
    for (final Element tr : detailTable.select("tr")) {
        final String desc = tr.child(0).text();
        final String content = tr.child(1).text();
        if (desc != null && !desc.trim().isEmpty()) {
            result.addDetail(new Detail(desc, content));
        } else if (!result.getDetails().isEmpty()) {
            final Detail lastDetail = result.getDetails().get(result.getDetails().size() - 1);
            lastDetail.setHtml(true);
            lastDetail.setContent(lastDetail.getContent() + "\n" + content);
        }
    }
    return result;
}

From source file:mergedoc.core.APIDocument.java

/**
 * Javadoc ? ??????/*  w w  w . j  a  v  a  2s.com*/
 * @param className ??
 * @param context 
 * @param comment 
 */
private void parseCommonTag(String className, Element element, Comment comment) {
    Elements dts = element.select("dl dt");
    for (Element dt : dts) {
        String dtText = dt.text();
        if (dtText.contains("")) {
            Elements aTags = dt.nextElementSibling().select("a:has(code)");
            for (Element a : aTags) {
                String url = a.attr("href");
                String ref;
                if (a.childNodeSize() != 1) {
                    ref = aTags.outerHtml();
                } else {
                    ref = formatClassName(className, url);
                    ref = FastStringUtils.replace(ref, "%28", "(");
                    ref = FastStringUtils.replace(ref, "%29", ")");

                    Pattern methodRefPat = PatternCache.getPattern("-(.*)-$");
                    Matcher methodRefMat = methodRefPat.matcher(ref);
                    if (methodRefMat.find()) {
                        ref = FastStringUtils.replaceAll(ref, "-(.*)-$", "($1)"); // for Java8
                        ref = FastStringUtils.replace(ref, "-", ","); // for Java8
                        ref = FastStringUtils.replace(ref, ":A", "[]"); // for Java8
                    }
                }
                comment.addSee(ref);
            }
        } else if (dtText.contains("???:")) {
            comment.addSince(dt.nextElementSibling().text());
        }
    }
}

From source file:net.parser.JobParser.java

private String getEmployerName(Element jobElement, int id) {

    String name = null;/*from   w  ww  . ja v a  2s.c o  m*/
    if (id == 0) {
        name = jobElement.select("strong").eq(0).text();
    } else {
        name = jobElement.select(".employer").text();
    }

    return name;
}

From source file:com.gumtreescraper.scraper.GumtreeScraper.java

private boolean isOwner(Element adElement) {
    Elements forSaleByElements = adElement.select("span.rs-ad-attributes-forsaleby_s");
    Elements forRentByElements = adElement.select("span.rs-ad-attributes-forrentby_s");

    // sometime if ads is owner then it does not display
    if (forSaleByElements.isEmpty() && forRentByElements.isEmpty()) {
        return true;
    }/* ww w. java2 s. co  m*/

    if (!forSaleByElements.isEmpty() && ("agency".equalsIgnoreCase(forSaleByElements.first().text().trim())
            || "agent".equalsIgnoreCase(forSaleByElements.first().text().trim()))) {
        return false;
    }

    if (!forRentByElements.isEmpty() && ("agency".equalsIgnoreCase(forRentByElements.first().text().trim())
            || "agent".equalsIgnoreCase(forRentByElements.first().text().trim()))) {
        return false;
    }

    return true;
}

From source file:de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java

@Override
public List<Download> parseDownloads(String downloadsUrl, boolean structured) throws StudipException {
    try {/*from  www.j  a v  a2  s. co m*/
        ensureLoggedIn();
        ensureCurrentSeminarSelected();

        navigate(downloadsUrl);
        Map<Integer, Download> stack = new HashMap<>();
        List<Download> downloads = new ArrayList<>();

        Elements rows = document
                .select("#content>table>tbody>tr:nth-of-type(2)>td:nth-of-type(2)>table>tbody>tr>td>table");
        for (org.jsoup.nodes.Element row : rows) {
            Elements content = row.select(">tbody>tr>td.printhead");
            Elements insets = row.select(">tbody>tr>td.blank img");
            if (content.size() >= 2) {
                Elements info = content.get(1).select("a");
                Elements link = content.get(2).select("a[title]");
                List<TextNode> time = content.get(2).textNodes();
                if (info.size() > 0 && link.size() > 0 && time.size() > 0) {
                    Download download = Download.getDownload(link.get(0).absUrl("href"),
                            info.get(0).text().trim(),
                            time.get(time.size() - 1).text().trim().replace("\u00a0", ""), "");
                    download.setSeminar(currentSeminar);
                    int level = insets.size() - 3;
                    if (level > 0) {
                        download.setParent(stack.get(level - 1));
                    } else {
                        download.setLevel(level);
                    }
                    stack.put(download.getLevel(), download);
                    downloads.add(download);
                    //TODO read size, description
                }
            }
        }
        log.debug("Parsed " + downloads.size() + " downloads.");
        log.trace(downloads.toString());
        return downloads;
    } catch (StudipException ex) {
        ex.put("studip.seminar", currentSeminar);
        ex.put("parseDownloads.listUrl", downloadsUrl);
        ex.put("parseDownloads.structured", structured);
        throw ex;
    }
}