List of usage examples for org.jsoup.nodes Element tagName
public String tagName()
From source file:edu.usu.sdl.openstorefront.service.io.HelpImporter.java
/** * Accept a stream pointed to markdown/*from w ww.j ava 2 s . co m*/ * * @param in * @return */ public List<HelpSection> processHelp(InputStream in) { List<HelpSection> helpSections = new ArrayList<>(); String data = ""; try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) { data = bin.lines().collect(Collectors.joining("\n")); } catch (IOException e) { } PegDownProcessor pegDownProcessor = new PegDownProcessor(PROCESSING_TIMEOUT); String html = pegDownProcessor.markdownToHtml(data); Document doc = Jsoup.parse(html); Elements elements = doc.getAllElements(); Set<String> headerTags = new HashSet<>(); headerTags.add("h1"); headerTags.add("h2"); headerTags.add("h3"); headerTags.add("h4"); headerTags.add("h5"); headerTags.add("h6"); boolean capture = false; HelpSection helpSection = null; for (Element element : elements) { if (headerTags.contains(element.tagName().toLowerCase()) == false && capture) { if (helpSection != null) { if (helpSection.getContent().contains(element.outerHtml()) == false) { helpSection.setContent(helpSection.getContent() + element.outerHtml()); } } } if (headerTags.contains(element.tagName().toLowerCase())) { String title = element.html(); if (helpSection != null) { //save old section addHelpSection(helpSections, helpSection); } String titleSplit[] = title.split(" "); helpSection = new HelpSection(); helpSection.setTitle(title); helpSection.setHeaderLevel(Convert.toInteger(element.tagName().toLowerCase().replace("h", ""))); helpSection.setSectionNumber(titleSplit[0]); helpSection.setContent(""); if (title.contains("*")) { helpSection.setAdminSection(true); } else { helpSection.setAdminSection(false); } capture = true; } } //Add last section if (helpSection != null) { addHelpSection(helpSections, helpSection); } return helpSections; }
From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java
private void renderSyntaxHighlightingHtml(final Document slidesDocument, final Configuration config) { for (Element code : slidesDocument.select("code")) { Charset encoding = config.getInputEncoding(); ByteArrayInputStream input = new ByteArrayInputStream(code.text().getBytes(encoding)); ByteArrayOutputStream out = new ByteArrayOutputStream(); String className = code.className(); if (StringUtils.isBlank(className)) { className = "java"; }/*from w w w . j av a2s . co m*/ Renderer renderer = XhtmlRendererFactory.getRenderer(className); if (renderer != null) { try { renderer.highlight("slidery", input, out, encoding.name(), true); code.html(new String(out.toByteArray(), encoding)); code.select("br").remove(); removeComments(code); code.html(code.html().trim()); Element parent = code.parent(); if (parent.tagName() == "pre") { parent.addClass("code"); } } catch (IOException e) { // TODO: Handle exception } } } }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
private void parseIssueAlertIusse(Document doc) { // TODO Auto-generated method stub Message message = null;//from w w w . j a v a 2s. c o m RssFeedGenerator newRssFeedGenerator = new RssFeedGenerator(); try { String content = "EventName: AlertIusse\n"; message = new Message(); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } System.out.println("content:" + content); newRssFeedGenerator.RssFeedXml("title", "Link", content); System.out.println("!!!"); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:com.johan.vertretungsplan.parser.SVPlanParser.java
public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); // JSONArray urls = schule.getData().getJSONArray("urls"); String encoding = schule.getData().getString("encoding"); List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < urls.length(); i++) { JSONObject url = urls.getJSONObject(i); loadUrl(url.getString("url"), encoding, docs); }//from ww w . j a v a2 s .c om LinkedHashMap<String, VertretungsplanTag> tage = new LinkedHashMap<String, VertretungsplanTag>(); for (Document doc : docs) { if (doc.select(".svp-tabelle").size() > 0) { VertretungsplanTag tag = new VertretungsplanTag(); String date = "Unbekanntes Datum"; if (doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").size() > 0) date = doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").text(); else if (doc.title().startsWith("Vertretungsplan fr ")) date = doc.title().substring("Vertretungsplan fr ".length()); tag.setDatum(date); if (doc.select(".svp-uploaddatum").size() > 0) tag.setStand(doc.select(".svp-uploaddatum").text().replace("Aktualisierung: ", "")); Elements rows = doc.select(".svp-tabelle tr"); String lastLesson = ""; for (Element row : rows) { if (row.hasClass("svp-header")) continue; Vertretung vertretung = new Vertretung(); List<String> affectedClasses = new ArrayList<String>(); for (Element column : row.select("td")) { if (!hasData(column.text())) { continue; } String type = column.className(); if (type.startsWith("svp-stunde")) { vertretung.setLesson(column.text()); lastLesson = column.text(); } else if (type.startsWith("svp-klasse")) affectedClasses = Arrays.asList(column.text().split(", ")); else if (type.startsWith("svp-esfehlt")) vertretung.setPreviousTeacher(column.text()); else if (type.startsWith("svp-esvertritt")) vertretung.setTeacher(column.text()); else if (type.startsWith("svp-fach")) vertretung.setSubject(column.text()); else if (type.startsWith("svp-bemerkung")) { vertretung.setDesc(column.text()); vertretung.setType(recognizeType(column.text())); } else if (type.startsWith("svp-raum")) vertretung.setRoom(column.text()); if (vertretung.getLesson() == null) vertretung.setLesson(lastLesson); } if (vertretung.getType() == null) { vertretung.setType("Vertretung"); } for (String klasse : affectedClasses) { KlassenVertretungsplan kv = tag.getKlassen().get(klasse); if (kv == null) kv = new KlassenVertretungsplan(klasse); kv.add(vertretung); tag.getKlassen().put(klasse, kv); } } List<String> nachrichten = new ArrayList<String>(); if (doc.select("h2:contains(Mitteilungen)").size() > 0) { Element h2 = doc.select("h2:contains(Mitteilungen)").first(); Element sibling = h2.nextElementSibling(); while (sibling != null && sibling.tagName().equals("p")) { for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) nachrichten.add(nachricht); } sibling = sibling.nextElementSibling(); } } tag.setNachrichten(nachrichten); tage.put(date, tag); } else { throw new IOException("keine SVPlan-Tabelle gefunden"); } } Vertretungsplan v = new Vertretungsplan(); v.setTage(new ArrayList<VertretungsplanTag>(tage.values())); return v; }
From source file:net.pixomania.crawler.W3C.parser.rules.editors.version.VersionEditorRule1.java
@Override public ArrayList<Person> run(String url, Document doc) { ArrayList<Person> editorList = new ArrayList<>(); Elements editors = doc.select("dt:contains(version 1), dt:contains(version 1) ~ dd"); if (editors.size() == 0) return null; boolean skip = false; String version = ""; for (Element editor : editors) { Element prev = editor.previousElementSibling(); if (prev != null) { if (prev.tagName().equals("dt")) { if (!prev.text().trim().toLowerCase().startsWith("version 1") && !prev.text().trim().toLowerCase().startsWith("editors (version 1")) { skip = true;//from ww w . ja va 2s . c o m } } if (skip) { Element next = editor.nextElementSibling(); if (next != null) { if (next.text().trim().toLowerCase().startsWith("version 1") || next.text().trim().toLowerCase().startsWith("editors (version 1")) { skip = false; continue; } } continue; } } if (editor.tagName().equals("dt")) { version = editor.text(); continue; } String[] splitted = editor.html().split("<br />|<br clear=\"none\" />"); if (splitted.length < 2) { if (editor.text().toLowerCase().startsWith("(in alphabetic") || editor.text().toLowerCase().startsWith("see acknowl") || editor.text().toLowerCase().startsWith("the w3") || editor.text().toLowerCase().startsWith("(see ac") || editor.text().toLowerCase().startsWith("see participants") || editor.text().toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:")) continue; Person result = NameParser.parse(editor.text()); if (result == null) continue; result.setVersion(version); for (int i = 0; i < editor.select("a").size(); i++) { if (!editor.select("a").get(i).attr("href").isEmpty()) { if (editor.select("a").get(i).attr("href").contains("@")) { result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(editor.select("a").get(i).attr("href")); } } } editorList.add(result); } else { for (String split : splitted) { if (!split.isEmpty()) { if (split.toLowerCase().startsWith("(in alphabetic") || split.toLowerCase().startsWith("see acknowl") || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac") || split.toLowerCase().startsWith("see participants") || split.toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (split.equals("WHATWG:") || split.equals("W3C:")) continue; Document newdoc = Jsoup.parse(split.replaceAll("\n", "")); Person result = NameParser.parse(newdoc.text()); if (result == null) continue; result.setVersion(version); for (int i = 0; i < newdoc.select("a").size(); i++) { if (!newdoc.select("a").get(i).attr("href").isEmpty()) { if (newdoc.select("a").get(i).attr("href").contains("@")) { result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(newdoc.select("a").get(i).attr("href")); } } } editorList.add(result); } } } Element next = editor.nextElementSibling(); if (next != null) if (next.tag().getName().equals("dt") && !next.text().trim().toLowerCase().startsWith("editors (version 1")) break; } if (editorList.size() == 0) return null; return editorList; }
From source file:org.apache.sling.hapi.client.forms.internal.FormValues.java
/** * @return//from ww w. j av a 2 s .c o m * {@see http://www.w3.org/TR/html5/forms.html#constructing-the-form-data-set} */ private FormValues build() { for (Element input : form.select("button, input, select, textarea")) { String type = input.attr("type"); if (input.hasAttr("disabled")) continue; if (input.tagName().equalsIgnoreCase("button") && !type.equals("submit")) continue; if (input.tagName().equalsIgnoreCase("input") && (type.equals("button") || type.equals("reset"))) continue; if (type.equals("checkbox") && input.hasAttr("checked")) continue; if (type.equals("radio") && input.hasAttr("checked")) continue; if (!type.equals("image") && input.attr("name").length() == 0) continue; if (input.parents().is("datalist")) continue; if (type.equals("image") || type.equals("file")) continue; // don't support files for now String name = input.attr("name"); if (input.tagName().equalsIgnoreCase("select")) { for (Element o : input.select("option[selected]")) { if (o.hasAttr("disabled")) continue; list.add(name, new BasicNameValuePair(name, o.val())); } } else if (type.equals("checkbox") || type.equals("radio")) { String value = input.hasAttr("value") ? input.val() : "on"; list.add(name, new BasicNameValuePair(name, value)); } else { list.add(name, new BasicNameValuePair(name, input.val())); } } return this; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
private Message parseIdentityRecommendation(Document doc) { Message message = null;//from ww w . j a va 2 s .c o m try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IdentityRecommendation\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Identity Recommendation"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
public Message parseIdentityVerification(Document doc) { Message message = null;/*w w w. j a v a 2 s. com*/ try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IdentityVerification\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Identity Verification"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java
public Message parseIssueRecommendation(Document doc) { Message message = null;/*from w ww. j a v a 2 s .com*/ try { Elements pID = doc.select("p|patternId"); String patternId = pID.get(0).text(); // String message = doc.select(query); Pattern pattern; pattern = patternDAO.findById(patternId); String content = "EventName: IssueRecommendation\n"; if (pattern != null) { message = new Message(); message.setPatternId(pattern); Elements events = doc.select("ns1|eventData"); org.jsoup.nodes.Element event = events.get(0); Elements elements = event.getAllElements(); for (org.jsoup.nodes.Element element : elements) { content = content + element.tagName() + " : " + element.ownText() + "\n"; } message.setSubject("Issue Recommendation"); message.setSummary("default summary"); message.setContent(content); message.setMsgDate(new Date()); message.setMsgID(1); } else System.out.println("can't find patternID of the complex event:" + patternId); } catch (Exception e) { System.out.println(e.getMessage()); } return message; }
From source file:me.vertretungsplan.parser.SVPlanParser.java
private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0 || doc.title().startsWith("Vertretungsplan fr "))) { setDate(svp, doc, day);/* w w w . j ava 2 s . c o m*/ if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) { Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr"); String lastLesson = ""; String lastClass = ""; for (Element row : rows) { if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header")) || row.select("th").size() > 0 || row.text().trim().equals("")) { continue; } Substitution substitution = new Substitution(); for (Element column : row.select("td")) { String type = column.className(); if (!hasData(column.text())) { if ((type.startsWith("svp-stunde") || type.startsWith("Stunde")) && hasData(lastLesson)) { substitution.setLesson(lastLesson); } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse")) && hasData(lastClass)) { substitution.getClasses().addAll(Arrays .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); } continue; } if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) { substitution.setLesson(column.text()); lastLesson = column.text(); } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) { substitution.getClasses().addAll(Arrays .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); lastClass = column.text(); } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setPreviousTeacher(column.text()); } } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setTeacher(column.text().replaceAll(" \\+$", "")); } } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) { substitution.setSubject(column.text()); } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) { substitution.setDesc(column.text()); String recognizedType = recognizeType(column.text()); substitution.setType(recognizedType); substitution.setColor(colorProvider.getColor(recognizedType)); } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) { substitution.setRoom(column.text()); } } if (substitution.getType() == null) { substitution.setType("Vertretung"); substitution.setColor(colorProvider.getColor("Vertretung")); } day.addSubstitution(substitution); } } if (svp.select(".LehrerVerplant").size() > 0) { day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text()); } if (svp.select(".Abwesenheiten").size() > 0) { day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text()); } if (svp.select("h2:contains(Mitteilungen)").size() > 0) { Element h2 = svp.select("h2:contains(Mitteilungen)").first(); Element sibling = h2.nextElementSibling(); while (sibling != null && sibling.tagName().equals("p")) { for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } sibling = sibling.nextElementSibling(); } } else if (svp.select(".Mitteilungen").size() > 0) { for (Element p : svp.select(".Mitteilungen")) { for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } } } v.addDay(day); } else { throw new IOException("keine SVPlan-Tabelle gefunden"); } }