Example usage for org.jsoup.nodes Element tagName

List of usage examples for org.jsoup.nodes Element tagName

Introduction

In this page you can find the example usage for org.jsoup.nodes Element tagName.

Prototype

public String tagName() 

Source Link

Document

Get the name of the tag for this element.

Usage

From source file:edu.usu.sdl.openstorefront.service.io.HelpImporter.java

/**
 * Accept a stream pointed to markdown/*from w ww.j ava 2 s  .  co  m*/
 *
 * @param in
 * @return
 */
public List<HelpSection> processHelp(InputStream in) {
    List<HelpSection> helpSections = new ArrayList<>();

    String data = "";
    try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) {
        data = bin.lines().collect(Collectors.joining("\n"));
    } catch (IOException e) {

    }

    PegDownProcessor pegDownProcessor = new PegDownProcessor(PROCESSING_TIMEOUT);
    String html = pegDownProcessor.markdownToHtml(data);
    Document doc = Jsoup.parse(html);
    Elements elements = doc.getAllElements();

    Set<String> headerTags = new HashSet<>();
    headerTags.add("h1");
    headerTags.add("h2");
    headerTags.add("h3");
    headerTags.add("h4");
    headerTags.add("h5");
    headerTags.add("h6");

    boolean capture = false;
    HelpSection helpSection = null;
    for (Element element : elements) {
        if (headerTags.contains(element.tagName().toLowerCase()) == false && capture) {
            if (helpSection != null) {
                if (helpSection.getContent().contains(element.outerHtml()) == false) {
                    helpSection.setContent(helpSection.getContent() + element.outerHtml());
                }
            }
        }

        if (headerTags.contains(element.tagName().toLowerCase())) {
            String title = element.html();

            if (helpSection != null) {
                //save old section
                addHelpSection(helpSections, helpSection);
            }

            String titleSplit[] = title.split(" ");

            helpSection = new HelpSection();
            helpSection.setTitle(title);
            helpSection.setHeaderLevel(Convert.toInteger(element.tagName().toLowerCase().replace("h", "")));
            helpSection.setSectionNumber(titleSplit[0]);
            helpSection.setContent("");

            if (title.contains("*")) {
                helpSection.setAdminSection(true);
            } else {
                helpSection.setAdminSection(false);
            }

            capture = true;
        }
    }
    //Add last section
    if (helpSection != null) {
        addHelpSection(helpSections, helpSection);
    }

    return helpSections;
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

private void renderSyntaxHighlightingHtml(final Document slidesDocument, final Configuration config) {
    for (Element code : slidesDocument.select("code")) {
        Charset encoding = config.getInputEncoding();
        ByteArrayInputStream input = new ByteArrayInputStream(code.text().getBytes(encoding));
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        String className = code.className();
        if (StringUtils.isBlank(className)) {
            className = "java";
        }/*from w w  w .  j  av a2s .  co  m*/
        Renderer renderer = XhtmlRendererFactory.getRenderer(className);
        if (renderer != null) {
            try {
                renderer.highlight("slidery", input, out, encoding.name(), true);
                code.html(new String(out.toByteArray(), encoding));
                code.select("br").remove();
                removeComments(code);
                code.html(code.html().trim());
                Element parent = code.parent();
                if (parent.tagName() == "pre") {
                    parent.addClass("code");
                }
            } catch (IOException e) {
                // TODO: Handle exception
            }
        }
    }
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

private void parseIssueAlertIusse(Document doc) {
    // TODO Auto-generated method stub
    Message message = null;//from w w  w  .  j  a v a  2s. c o  m
    RssFeedGenerator newRssFeedGenerator = new RssFeedGenerator();
    try {

        String content = "EventName: AlertIusse\n";
        message = new Message();
        Elements events = doc.select("ns1|eventData");
        org.jsoup.nodes.Element event = events.get(0);
        Elements elements = event.getAllElements();
        for (org.jsoup.nodes.Element element : elements) {
            content = content + element.tagName() + " : " + element.ownText() + "\n";
        }
        System.out.println("content:" + content);
        newRssFeedGenerator.RssFeedXml("title", "Link", content);
        System.out.println("!!!");
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }

}

From source file:com.johan.vertretungsplan.parser.SVPlanParser.java

public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); //

    JSONArray urls = schule.getData().getJSONArray("urls");
    String encoding = schule.getData().getString("encoding");
    List<Document> docs = new ArrayList<Document>();

    for (int i = 0; i < urls.length(); i++) {
        JSONObject url = urls.getJSONObject(i);
        loadUrl(url.getString("url"), encoding, docs);
    }//from ww  w .  j  a v  a2 s .c om

    LinkedHashMap<String, VertretungsplanTag> tage = new LinkedHashMap<String, VertretungsplanTag>();
    for (Document doc : docs) {
        if (doc.select(".svp-tabelle").size() > 0) {
            VertretungsplanTag tag = new VertretungsplanTag();
            String date = "Unbekanntes Datum";
            if (doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").size() > 0)
                date = doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").text();
            else if (doc.title().startsWith("Vertretungsplan fr "))
                date = doc.title().substring("Vertretungsplan fr ".length());
            tag.setDatum(date);
            if (doc.select(".svp-uploaddatum").size() > 0)
                tag.setStand(doc.select(".svp-uploaddatum").text().replace("Aktualisierung: ", ""));

            Elements rows = doc.select(".svp-tabelle tr");
            String lastLesson = "";
            for (Element row : rows) {
                if (row.hasClass("svp-header"))
                    continue;

                Vertretung vertretung = new Vertretung();
                List<String> affectedClasses = new ArrayList<String>();

                for (Element column : row.select("td")) {
                    if (!hasData(column.text())) {
                        continue;
                    }
                    String type = column.className();
                    if (type.startsWith("svp-stunde")) {
                        vertretung.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse"))
                        affectedClasses = Arrays.asList(column.text().split(", "));
                    else if (type.startsWith("svp-esfehlt"))
                        vertretung.setPreviousTeacher(column.text());
                    else if (type.startsWith("svp-esvertritt"))
                        vertretung.setTeacher(column.text());
                    else if (type.startsWith("svp-fach"))
                        vertretung.setSubject(column.text());
                    else if (type.startsWith("svp-bemerkung")) {
                        vertretung.setDesc(column.text());
                        vertretung.setType(recognizeType(column.text()));
                    } else if (type.startsWith("svp-raum"))
                        vertretung.setRoom(column.text());

                    if (vertretung.getLesson() == null)
                        vertretung.setLesson(lastLesson);
                }

                if (vertretung.getType() == null) {
                    vertretung.setType("Vertretung");
                }

                for (String klasse : affectedClasses) {
                    KlassenVertretungsplan kv = tag.getKlassen().get(klasse);
                    if (kv == null)
                        kv = new KlassenVertretungsplan(klasse);
                    kv.add(vertretung);
                    tag.getKlassen().put(klasse, kv);
                }
            }

            List<String> nachrichten = new ArrayList<String>();
            if (doc.select("h2:contains(Mitteilungen)").size() > 0) {
                Element h2 = doc.select("h2:contains(Mitteilungen)").first();
                Element sibling = h2.nextElementSibling();
                while (sibling != null && sibling.tagName().equals("p")) {
                    for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                            .split("<br />\\s*<br />")) {
                        if (hasData(nachricht))
                            nachrichten.add(nachricht);
                    }
                    sibling = sibling.nextElementSibling();
                }
            }
            tag.setNachrichten(nachrichten);

            tage.put(date, tag);
        } else {
            throw new IOException("keine SVPlan-Tabelle gefunden");
        }
    }
    Vertretungsplan v = new Vertretungsplan();
    v.setTage(new ArrayList<VertretungsplanTag>(tage.values()));

    return v;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.version.VersionEditorRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(version 1), dt:contains(version 1) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    String version = "";
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev != null) {
            if (prev.tagName().equals("dt")) {
                if (!prev.text().trim().toLowerCase().startsWith("version 1")
                        && !prev.text().trim().toLowerCase().startsWith("editors (version 1")) {
                    skip = true;//from   ww  w .  ja va 2s  .  c  o  m
                }
            }

            if (skip) {
                Element next = editor.nextElementSibling();
                if (next != null) {
                    if (next.text().trim().toLowerCase().startsWith("version 1")
                            || next.text().trim().toLowerCase().startsWith("editors (version 1")) {
                        skip = false;

                        continue;
                    }
                }
                continue;
            }
        }

        if (editor.tagName().equals("dt")) {
            version = editor.text();
            continue;
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            result.setVersion(version);
            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    result.setVersion(version);
                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt")
                    && !next.text().trim().toLowerCase().startsWith("editors (version 1"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:org.apache.sling.hapi.client.forms.internal.FormValues.java

/**
 * @return//from  ww  w. j  av a 2  s  .c  o m
 * {@see http://www.w3.org/TR/html5/forms.html#constructing-the-form-data-set}
 */
private FormValues build() {
    for (Element input : form.select("button, input, select, textarea")) {
        String type = input.attr("type");

        if (input.hasAttr("disabled"))
            continue;
        if (input.tagName().equalsIgnoreCase("button") && !type.equals("submit"))
            continue;
        if (input.tagName().equalsIgnoreCase("input") && (type.equals("button") || type.equals("reset")))
            continue;
        if (type.equals("checkbox") && input.hasAttr("checked"))
            continue;
        if (type.equals("radio") && input.hasAttr("checked"))
            continue;
        if (!type.equals("image") && input.attr("name").length() == 0)
            continue;
        if (input.parents().is("datalist"))
            continue;

        if (type.equals("image") || type.equals("file"))
            continue; // don't support files for now
        String name = input.attr("name");

        if (input.tagName().equalsIgnoreCase("select")) {
            for (Element o : input.select("option[selected]")) {
                if (o.hasAttr("disabled"))
                    continue;
                list.add(name, new BasicNameValuePair(name, o.val()));
            }
        } else if (type.equals("checkbox") || type.equals("radio")) {
            String value = input.hasAttr("value") ? input.val() : "on";
            list.add(name, new BasicNameValuePair(name, value));
        } else {
            list.add(name, new BasicNameValuePair(name, input.val()));
        }
    }
    return this;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

private Message parseIdentityRecommendation(Document doc) {
    Message message = null;//from ww  w . j  a  va 2 s  .c o  m
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIdentityVerification(Document doc) {
    Message message = null;/*w  w  w. j  a v  a  2 s. com*/
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityVerification\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Verification");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIssueRecommendation(Document doc) {
    Message message = null;/*from   w ww. j a  v  a 2  s .com*/
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IssueRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);

            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Issue Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:me.vertretungsplan.parser.SVPlanParser.java

private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();
    if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0
            || doc.title().startsWith("Vertretungsplan fr "))) {
        setDate(svp, doc, day);/*  w  w w . j ava 2  s . c  o  m*/
        if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) {

            Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr");
            String lastLesson = "";
            String lastClass = "";
            for (Element row : rows) {
                if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header"))
                        || row.select("th").size() > 0 || row.text().trim().equals("")) {
                    continue;
                }

                Substitution substitution = new Substitution();

                for (Element column : row.select("td")) {
                    String type = column.className();
                    if (!hasData(column.text())) {
                        if ((type.startsWith("svp-stunde") || type.startsWith("Stunde"))
                                && hasData(lastLesson)) {
                            substitution.setLesson(lastLesson);
                        } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse"))
                                && hasData(lastClass)) {
                            substitution.getClasses().addAll(Arrays
                                    .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        }
                        continue;
                    }
                    if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) {
                        substitution.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) {
                        substitution.getClasses().addAll(Arrays
                                .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        lastClass = column.text();
                    } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setPreviousTeacher(column.text());
                        }
                    } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setTeacher(column.text().replaceAll(" \\+$", ""));
                        }
                    } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) {
                        substitution.setSubject(column.text());
                    } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) {
                        substitution.setDesc(column.text());
                        String recognizedType = recognizeType(column.text());
                        substitution.setType(recognizedType);
                        substitution.setColor(colorProvider.getColor(recognizedType));
                    } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) {
                        substitution.setRoom(column.text());
                    }
                }

                if (substitution.getType() == null) {
                    substitution.setType("Vertretung");
                    substitution.setColor(colorProvider.getColor("Vertretung"));
                }

                day.addSubstitution(substitution);
            }
        }
        if (svp.select(".LehrerVerplant").size() > 0) {
            day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text());
        }
        if (svp.select(".Abwesenheiten").size() > 0) {
            day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text());
        }

        if (svp.select("h2:contains(Mitteilungen)").size() > 0) {
            Element h2 = svp.select("h2:contains(Mitteilungen)").first();
            Element sibling = h2.nextElementSibling();
            while (sibling != null && sibling.tagName().equals("p")) {
                for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
                sibling = sibling.nextElementSibling();
            }
        } else if (svp.select(".Mitteilungen").size() > 0) {
            for (Element p : svp.select(".Mitteilungen")) {
                for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
            }
        }
        v.addDay(day);
    } else {
        throw new IOException("keine SVPlan-Tabelle gefunden");
    }
}