Example usage for org.jsoup.nodes Element nextElementSibling

List of usage examples for org.jsoup.nodes Element nextElementSibling

Introduction

In this page you can find the example usage for org.jsoup.nodes Element nextElementSibling.

Prototype

public Element nextElementSibling() 

Source Link

Document

Gets the next sibling element of this element.

Usage

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static List<DocBlock> getDocBlock(String jdocBase, Element elem, ClassDocumentation reference) {
    if (elem != null) {
        String baseLink = JDocUtil.getLink(jdocBase, reference);
        List<DocBlock> blocks = new ArrayList<>(10);
        String hashLink = null;/* w w  w. ja  va2  s .c om*/
        for (elem = elem.nextElementSibling(); elem != null; elem = elem.nextElementSibling()) {
            if (elem.tagName().equals("a")) {
                hashLink = '#' + elem.attr("name");
            } else if (elem.tagName().equals("ul")) {
                Element tmp = elem.getElementsByTag("h4").first();
                String title = JDocUtil.fixSpaces(tmp.text().trim());
                String description = "", signature = "";
                OrderedMap<String, List<String>> fields = new ListOrderedMap<>();
                for (; tmp != null; tmp = tmp.nextElementSibling()) {
                    if (tmp.tagName().equals("pre")) {
                        //contains full signature
                        signature = JDocUtil.fixSpaces(tmp.text().trim());
                    } else if (tmp.tagName().equals("div") && tmp.className().equals("block")) {
                        //main block of content (description or deprecation)
                        Element deprecationElem = tmp.getElementsByClass("deprecationComment").first();
                        if (deprecationElem != null) {
                            //deprecation block
                            fields.put("Deprecated:", Collections
                                    .singletonList(JDocUtil.formatText(deprecationElem.html(), baseLink)));
                        } else {
                            //description block
                            description = JDocUtil.formatText(tmp.html(), baseLink);
                        }
                    } else if (tmp.tagName().equals("dl")) {
                        //a field
                        String fieldName = null;
                        List<String> fieldValues = new ArrayList<>();
                        for (Element element : tmp.children()) {
                            if (element.tagName().equals("dt")) {
                                if (fieldName != null) {
                                    fields.put(fieldName, fieldValues);
                                    fieldValues = new ArrayList<>();
                                }
                                fieldName = JDocUtil.fixSpaces(element.text().trim());
                            } else if (element.tagName().equals("dd")) {
                                fieldValues.add(JDocUtil.formatText(element.html(), baseLink));
                            }
                        }
                        if (fieldName != null) {
                            fields.put(fieldName, fieldValues);
                        }
                    }
                }
                blocks.add(new DocBlock(title, hashLink, signature, description, fields));
            }
        }
        return blocks;
    }
    return null;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Map<String, String> getInheritedMethods(Element summaryAnchor) {
    Map<String, String> inherited = new HashMap<>();
    if (summaryAnchor == null)
        return inherited;
    summaryAnchor = summaryAnchor.parent();
    Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]");
    for (Element inheritAnchor : inheritAnchors) {
        if (inheritAnchor.siblingElements().size() != 2)
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element next = inheritAnchor.nextElementSibling();
        if (!next.tagName().equals("h3"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element sub = next.children().last();
        if (sub == null || !sub.tagName().equals("a"))
            continue;
        String parent = sub.text().toLowerCase();
        next = next.nextElementSibling();
        if (!next.tagName().equals("code"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) {
            if (sub.tagName().equals("a")) {
                inherited.putIfAbsent(sub.text().toLowerCase(), parent);
            }/* w  w  w . j ava2  s  . c  o  m*/
        }
    }
    return inherited;
}

From source file:com.screenslicer.common.CommonUtil.java

public static List<Element> getNextSiblingElementsByOwnText(Document doc, String text) {
    Elements elements = doc.getElementsMatchingOwnText(text);
    List<Element> siblings = new ArrayList<Element>();
    if (elements == null || elements.isEmpty()) {
        return siblings;
    }//from w ww.j ava  2  s  .  c  om
    Element element = elements.get(0).nextElementSibling();
    while (element != null) {
        siblings.add(element);
        String tagName = element.tagName();
        element = element.nextElementSibling();
        if (element != null && !element.tagName().equalsIgnoreCase(tagName)) {
            break;
        }
    }
    return siblings;
}

From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java

private ImmutableList<String> druckSachenContents(Document htmlDoc) {
    /*//  www .  j a  va 2  s  .co  m
     * In this way we can identify the bits of "RTF" like text inserted into the overall HTML.
     * JSoup cleans up the broken HTML removing the xml declaration and inserted html roots
     * that ALLRIS manages to put in.
     */
    Elements contentMetaElements = htmlDoc.getElementsByAttributeValue("name", "generator");
    ImmutableList.Builder<String> listBuilder = ImmutableList.builder();

    /*
     * Iterate over our candidates. Sometimes there are several.
     */
    for (Element contentMetaElement : contentMetaElements) {
        StringBuilder contentAsTextBuilder = new StringBuilder();
        Element nextSibling = contentMetaElement.nextElementSibling();

        /*
         * In the cleaned up HTML DOM returned by JSoup the "RTF" content is
         * rendered as siblings of the meta node (JSoup having removed the html, head, body
         * elements which should never have been there in the first place). 
         */
        while (nextSibling != null && !nextSibling.tag().equals("meta")) {
            contentAsTextBuilder.append(nextSibling.text());
            nextSibling = nextSibling.nextElementSibling();
        }
        /*
         * Only carry over non-empty content.
         */
        String contentAsText = contentAsTextBuilder.toString();
        if (!removeNonBreakingSpacesAndTrim(contentAsText).isEmpty()) {
            listBuilder.add(contentAsText);
        }
    }

    return listBuilder.build();
}

From source file:com.johan.vertretungsplan.parser.SVPlanParser.java

public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); //

    JSONArray urls = schule.getData().getJSONArray("urls");
    String encoding = schule.getData().getString("encoding");
    List<Document> docs = new ArrayList<Document>();

    for (int i = 0; i < urls.length(); i++) {
        JSONObject url = urls.getJSONObject(i);
        loadUrl(url.getString("url"), encoding, docs);
    }/*  www  .  j  a v a 2s .c o  m*/

    LinkedHashMap<String, VertretungsplanTag> tage = new LinkedHashMap<String, VertretungsplanTag>();
    for (Document doc : docs) {
        if (doc.select(".svp-tabelle").size() > 0) {
            VertretungsplanTag tag = new VertretungsplanTag();
            String date = "Unbekanntes Datum";
            if (doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").size() > 0)
                date = doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").text();
            else if (doc.title().startsWith("Vertretungsplan fr "))
                date = doc.title().substring("Vertretungsplan fr ".length());
            tag.setDatum(date);
            if (doc.select(".svp-uploaddatum").size() > 0)
                tag.setStand(doc.select(".svp-uploaddatum").text().replace("Aktualisierung: ", ""));

            Elements rows = doc.select(".svp-tabelle tr");
            String lastLesson = "";
            for (Element row : rows) {
                if (row.hasClass("svp-header"))
                    continue;

                Vertretung vertretung = new Vertretung();
                List<String> affectedClasses = new ArrayList<String>();

                for (Element column : row.select("td")) {
                    if (!hasData(column.text())) {
                        continue;
                    }
                    String type = column.className();
                    if (type.startsWith("svp-stunde")) {
                        vertretung.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse"))
                        affectedClasses = Arrays.asList(column.text().split(", "));
                    else if (type.startsWith("svp-esfehlt"))
                        vertretung.setPreviousTeacher(column.text());
                    else if (type.startsWith("svp-esvertritt"))
                        vertretung.setTeacher(column.text());
                    else if (type.startsWith("svp-fach"))
                        vertretung.setSubject(column.text());
                    else if (type.startsWith("svp-bemerkung")) {
                        vertretung.setDesc(column.text());
                        vertretung.setType(recognizeType(column.text()));
                    } else if (type.startsWith("svp-raum"))
                        vertretung.setRoom(column.text());

                    if (vertretung.getLesson() == null)
                        vertretung.setLesson(lastLesson);
                }

                if (vertretung.getType() == null) {
                    vertretung.setType("Vertretung");
                }

                for (String klasse : affectedClasses) {
                    KlassenVertretungsplan kv = tag.getKlassen().get(klasse);
                    if (kv == null)
                        kv = new KlassenVertretungsplan(klasse);
                    kv.add(vertretung);
                    tag.getKlassen().put(klasse, kv);
                }
            }

            List<String> nachrichten = new ArrayList<String>();
            if (doc.select("h2:contains(Mitteilungen)").size() > 0) {
                Element h2 = doc.select("h2:contains(Mitteilungen)").first();
                Element sibling = h2.nextElementSibling();
                while (sibling != null && sibling.tagName().equals("p")) {
                    for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                            .split("<br />\\s*<br />")) {
                        if (hasData(nachricht))
                            nachrichten.add(nachricht);
                    }
                    sibling = sibling.nextElementSibling();
                }
            }
            tag.setNachrichten(nachrichten);

            tage.put(date, tag);
        } else {
            throw new IOException("keine SVPlan-Tabelle gefunden");
        }
    }
    Vertretungsplan v = new Vertretungsplan();
    v.setTage(new ArrayList<VertretungsplanTag>(tage.values()));

    return v;
}

From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java

private ImmutableMap<String, String> druckSachenProperties(Document htmlDoc) {

    ImmutableMap.Builder<String, String> mapBuilder = ImmutableMap.builder();
    Elements keyElements = htmlDoc.getElementsByClass("kb1"); // td elements
    for (Element element : keyElements) {
        String key = removeNonBreakingSpacesAndTrim(element.text());
        if (key.endsWith(":")) {
            key = key.substring(0, key.length() - 1);
        }//w w  w .  j av a  2s .c  om
        if (element.nextElementSibling() != null && !element.nextElementSibling().hasAttr("kb1")) {
            String value = removeNonBreakingSpacesAndTrim(element.nextElementSibling().text());

            if ((!key.isEmpty()) && (!value.isEmpty())) {
                mapBuilder.put(key, value);
            }
        }
    }
    return mapBuilder.build();
}

From source file:gov.medicaid.screening.dao.impl.BusinessLienDAOBean.java

/**
 * Get value pair of label element.//from ww  w  .j  av  a  2s . c o  m
 *
 * @param elements group of elements
 * @param label label to look for
 * @return value
 */
private String getValuePairOfLabel(Elements elements, String label) {
    Element labelElement = elements.select("dt:containsOwn(" + label + ")").first();
    return labelElement != null && labelElement.nextElementSibling() != null
            ? labelElement.nextElementSibling().text()
            : "";
}

From source file:me.vertretungsplan.parser.SVPlanParser.java

private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();
    if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0
            || doc.title().startsWith("Vertretungsplan fr "))) {
        setDate(svp, doc, day);/*from w w  w .  j ava2s .com*/
        if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) {

            Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr");
            String lastLesson = "";
            String lastClass = "";
            for (Element row : rows) {
                if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header"))
                        || row.select("th").size() > 0 || row.text().trim().equals("")) {
                    continue;
                }

                Substitution substitution = new Substitution();

                for (Element column : row.select("td")) {
                    String type = column.className();
                    if (!hasData(column.text())) {
                        if ((type.startsWith("svp-stunde") || type.startsWith("Stunde"))
                                && hasData(lastLesson)) {
                            substitution.setLesson(lastLesson);
                        } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse"))
                                && hasData(lastClass)) {
                            substitution.getClasses().addAll(Arrays
                                    .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        }
                        continue;
                    }
                    if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) {
                        substitution.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) {
                        substitution.getClasses().addAll(Arrays
                                .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        lastClass = column.text();
                    } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setPreviousTeacher(column.text());
                        }
                    } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setTeacher(column.text().replaceAll(" \\+$", ""));
                        }
                    } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) {
                        substitution.setSubject(column.text());
                    } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) {
                        substitution.setDesc(column.text());
                        String recognizedType = recognizeType(column.text());
                        substitution.setType(recognizedType);
                        substitution.setColor(colorProvider.getColor(recognizedType));
                    } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) {
                        substitution.setRoom(column.text());
                    }
                }

                if (substitution.getType() == null) {
                    substitution.setType("Vertretung");
                    substitution.setColor(colorProvider.getColor("Vertretung"));
                }

                day.addSubstitution(substitution);
            }
        }
        if (svp.select(".LehrerVerplant").size() > 0) {
            day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text());
        }
        if (svp.select(".Abwesenheiten").size() > 0) {
            day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text());
        }

        if (svp.select("h2:contains(Mitteilungen)").size() > 0) {
            Element h2 = svp.select("h2:contains(Mitteilungen)").first();
            Element sibling = h2.nextElementSibling();
            while (sibling != null && sibling.tagName().equals("p")) {
                for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
                sibling = sibling.nextElementSibling();
            }
        } else if (svp.select(".Mitteilungen").size() > 0) {
            for (Element p : svp.select(".Mitteilungen")) {
                for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
            }
        }
        v.addDay(day);
    } else {
        throw new IOException("keine SVPlan-Tabelle gefunden");
    }
}

From source file:edu.harvard.iq.safe.lockss.impl.LOCKSSPlatformStatusHtmlParser.java

/**
 *
 * @param is/*ww  w  .  jav a 2  s .com*/
 */
@Override
public void getPlatformStatusData(InputStream is) {

    try {

        Document doc = DataUtil.load(is, "UTF-8", "");
        Element body = doc.body();

        // most of the target items are sandwitched by <b> tag
        // this can be used to reach each target item.
        String tmpCurrentTime = null;
        String tmpUpTime = null;
        String currentTime = null;
        Elements tags = body.getElementsByTag("b");

        for (Element tag : tags) {

            // get the current-time string: for 1.52.3 or older daemons
            // this is the ony place to get it.
            String tagText = tag.text();
            logger.log(Level.FINE, "working on tagText={0}", tagText);

            if (tagText.equals("Daemon Status")) {
                // find current time and up running
                currentTime = tag.parent().parent().text();
                logger.log(Level.INFO, "currentTime text=[{0}]", currentTime);
                // "currentTime =Daemon Status lockss.statelib.lib.in.us (usdocspln group) 01:25:55 03/01/12, up 7d5h21m"
                tmstmpMatcher = currentTimeStampPattern.matcher(currentTime);

                if (tmstmpMatcher.find()) {
                    logger.log(Level.INFO, "group 0={0}", tmstmpMatcher.group(0));
                    tmpCurrentTime = tmstmpMatcher.group(1);
                    logger.log(Level.INFO, "Current Time:group 1={0}", tmpCurrentTime);
                    tmpUpTime = tmstmpMatcher.group(2);
                    logger.log(Level.INFO, "UpTime:group 2={0}", tmpUpTime);
                }
            }

            // get the remaining key-value sets
            if (fieldNameSet.contains(tagText)) {

                Element parent = tag.parent();
                String fieldValue = parent.nextElementSibling().text();
                logger.log(Level.FINE, "{0}={1}", new Object[] { tagText, fieldValue });
                summaryInfoMap.put(tagText, fieldValue);
            }
        }

        // extract the daemon version and platform info that are located
        // at the bottom
        // these data are sandwitched by a <center> tag
        Elements ctags = body.getElementsByTag("center");
        String version = null;
        String platform = null;
        for (Element ctag : ctags) {
            String cText = ctag.text();
            logger.log(Level.FINE, "center tag Text={0}", cText);
            // cText is like this:
            // Daemon 1.53.3 built 28-Jan-12 01:06:36 on build7.lockss.org, Linux RPM 1
            if (StringUtils.isNotBlank(cText) && ctag.child(0).nodeName().equals("font")) {
                String[] versionPlatform = cText.split(", ");
                if (versionPlatform.length == 2) {
                    logger.log(Level.INFO, "daemon version={0};platform={1}", versionPlatform);
                    version = DaemonStatusDataUtil.getDaemonVersion(versionPlatform[0]);
                    platform = versionPlatform[1];
                } else {
                    // the above regex failed
                    logger.log(Level.WARNING, "String-formatting differs; use pattern matching");
                    version = DaemonStatusDataUtil.getDaemonVersion(cText);
                    int platformOffset = cText.lastIndexOf(", ") + 2;
                    platform = cText.substring(platformOffset);
                    logger.log(Level.INFO, "platform={0}", platform);

                }
            }
        }

        if (summaryInfoMap.containsKey("V3 Identity")) {
            String ipAddress = DaemonStatusDataUtil.getPeerIpAddress(summaryInfoMap.get("V3 Identity"));
            logger.log(Level.INFO, "ipAddress={0}", ipAddress);

            if (StringUtils.isNotBlank(ipAddress)) {
                boxInfoMap.put("host", ipAddress);
                if (!ipAddress.equals(summaryInfoMap.get("IP Address"))) {
                    summaryInfoMap.put("IP Address", ipAddress);
                }
            } else {
                logger.log(Level.WARNING, "host token is blank or null: use IP Address instead");
                logger.log(Level.INFO, "IP Address={0}", summaryInfoMap.get("IP Address"));
                boxInfoMap.put("host", summaryInfoMap.get("IP Address"));
            }
        }

        // for pre-1.53.3 versions
        boxInfoMap.put("time", tmpCurrentTime);
        if (!summaryInfoMap.containsKey("Current Time")) {
            summaryInfoMap.put("Current Time", tmpCurrentTime);
        }

        boxInfoMap.put("up", tmpUpTime);
        if (!summaryInfoMap.containsKey("Uptime")) {
            summaryInfoMap.put("Uptime", tmpUpTime);
        }

        boxInfoMap.put("version", version);
        if (!summaryInfoMap.containsKey("Daemon Version")) {
            summaryInfoMap.put("Daemon Version", version);
        }

        boxInfoMap.put("platform", platform);
        if (!summaryInfoMap.containsKey("Platform")) {
            summaryInfoMap.put("Platform", platform);
        }

    } catch (IOException ex) {
        logger.log(Level.SEVERE, "IO error", ex);
    }

    logger.log(Level.INFO, "boxInfoMap={0}", boxInfoMap);
    logger.log(Level.INFO, "summaryInfo={0}", summaryInfoMap);
}

From source file:net.pixomania.crawler.W3C.parser.rules.principalAuthors.PrincipalAuthorsRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Principal Author) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if (!prev.text().trim().toLowerCase().startsWith("principal author")) {
                skip = true;/*  www  . j  av  a 2  s.com*/
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("principal author")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        String[] splitted = editor.html().split(",");

        for (String split : splitted) {
            if (!split.isEmpty()) {
                if (split.toLowerCase().startsWith("(in alphabetic")
                        || split.toLowerCase().startsWith("see acknowl")
                        || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac")
                        || split.toLowerCase().startsWith("see participants")
                        || split.toLowerCase().contains("note:")) {
                    Log.log("warning", "Spec " + url + " may refer to a different section!");
                    continue;
                }
                if (split.equals("WHATWG:") || split.equals("W3C:"))
                    continue;
                Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                Person result = NameParser.parse(newdoc.text());
                if (result == null)
                    continue;

                for (int i = 0; i < newdoc.select("a").size(); i++) {
                    if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                        if (newdoc.select("a").get(i).attr("href").contains("@")) {
                            result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                        } else {
                            result.addWebsite(newdoc.select("a").get(i).attr("href"));
                        }
                    }
                }

                editorList.add(result);
            }
        }
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}