List of usage examples for org.jsoup.nodes Document html
public String html()
From source file:com.example.app.ui.DemoUserProfileViewer.java
@Override public void init() { // Make sure you call super.init() at the top of this method. /// See the Javadoc for #init() for more information about what it does. super.init(); // Set HTML element type and class names for presentation use on this Container component. withHTMLElement(HTMLElement.section); addClassName("user-profile-viewer"); // property_viewer is a standard class name. addClassName("property-viewer"); // Add microdata for programmatic / SEO use /// OR use RDFa support /// You typically only do this in viewers - not editors. setAttribute("itemscope", ""); setAttribute("itemtype", "http://schema.org/Person"); // setAttribute allows you to set any attribute as long as it will not interfere with a component's /// native HTML. For example, you cannot set the "value" attribute on a Field since /// it uses that attribute. // It's a good idea to *not* mark variables final that you don't want in the scope of event listeners. /// Hibernate/JPA entities are a great example of this pattern. You always need to re-attach /// entities before using them, so we should always call getUserProfile() in the context /// of handling an event. Note: our getUserProfile() method re-attaches the entity. DemoUserProfile demoUserProfile = getDemoUserProfile(); Name name = demoUserProfile.getName(); // You can use a Field for displaying non-internationalized content. /// It is desirable to do this since you don't need to create a LocalizedText. /// However, you cannot change the HTMLElement of a Field at this time, /// so some of the following code uses a Label which does allow /// specification of the HTMLElement. final Field namePrefix = new Field(name.getFormOfAddress(), false); final Field nameGiven = new Field(name.getFirst(), false); final Field nameFamily = new Field(name.getLast(), false); final Field nameSuffix = new Field(name.getSuffix(), false); // Sometimes it is easier and less error prone to make a component non-visible /// than checking for null on each use. Use this pattern with care. You don't /// want to consume a lot of resource unnecessarily. if (isEmptyString(namePrefix.getText())) namePrefix.setVisible(false);//from ww w . j a v a2 s. co m if (isEmptyString(nameSuffix.getText())) nameSuffix.setVisible(false); // Address Address address = demoUserProfile.getPostalAddress(); // Address lines are always on their own line so we make sure they are enclosed by a block element like a DIV.. final Label addressLine1 = new Label(); addressLine1.withHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address-line"); final Label addressLine2 = new Label(); addressLine2.withHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address-line"); if (address.getAddressLineList().size() > 0) addressLine1.setText(createText(address.getAddressLine(1))); if (address.getAddressLineList().size() > 1) addressLine2.setText(createText(address.getAddressLine(2))); final HTMLComponent city = new HTMLComponent(); // The "prop" class name is part of the standard HTML structure. It is always a good idea to also /// add a specific class name like "city" in this example. Please be consistent when using class names. /// For example, if everyone else is using "city", please use "city" too. Don't come up with another class name /// that means something similar like "town" or "locality". Consistency has a big impact on /// the time required to style HTML as well as the ability to reuse CSS. city.withHTMLElement(HTMLElement.span).addClassName("prop").addClassName("city"); if (!isEmptyString(address.getCity())) { // Our microdata for the city shouldn't include the comma, so this is a bit more complicated than the other examples. city.setText(createText("<span itemprop=\"addressLocality\">" + address.getCity() + "</span><span class=\"delimiter\">,</span>")); } else city.setVisible(false); final Label state = new Label(createText(address.getState())); state.addClassName("prop").addClassName("state"); final Label postalCode = new Label(createText(address.getPostalCode())); postalCode.addClassName("prop").addClassName("postal_code"); // Other Contact final Field phoneNumber = new Field(demoUserProfile.getPhoneNumber(), false); final Field emailAddress = new Field(demoUserProfile.getEmailAddress(), false); // Social Contact final URILink twitterLink = demoUserProfile.getTwitterLink() != null ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getTwitterLink(), null)) : null; final URILink facebookLink = demoUserProfile.getFacebookLink() != null ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getFacebookLink(), null)) : null; final URILink linkedInLink = demoUserProfile.getLinkedInLink() != null ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getLinkedInLink(), null)) : null; // We are going to output HTML received from the outside, so we need to sanitize it first for security reasons. /// Sometimes you'll do this sanitation prior to persisting the data. It depends on whether or not you need to /// keep the original unsanitized HTML around. String processedHTML = demoUserProfile.getAboutMeProse(); if (!isEmptyString(processedHTML)) { // Process the HTML converting links as necessary (adding JSESSIONID(s) /// for URL based session tracking, converting resource links to increase concurrent loading limit, /// CMS link externalization, etc). /// This is *not* sanitation and should always be done before rendering - never before persisting. /// We are doing this before sanitizing the HTML to avoid having to whitelist internal URL protocols, etc. try { processedHTML = XMLRenderer.parseWithRoot(processedHTML, Event.getRequest(), Event.getResponse()); } catch (IOException e) { _logger.error("Unable to accept HTML: " + processedHTML, e); } // We don't trust the input, so we sanitize it with a whitelist of allowed HTML. Document dirty = Jsoup.parseBodyFragment(processedHTML, ""); Whitelist whitelist = Whitelist.relaxed(); // Don't allow users to use our website as a link farm whitelist.addEnforcedAttribute("a", "rel", "nofollow"); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(dirty); processedHTML = clean.html(); } final HTMLComponent aboutMeProse = new HTMLComponent(processedHTML); Component aboutMeVideo = null; URL videoLink = demoUserProfile.getAboutMeVideoLink(); if (videoLink != null) { // There are several ways to link to media (Youtube video URL, Vimeo video URL, Flickr URL, // internally hosted media file, etc). /// You can link to it. /// You can embed it. See http://oembed.com/ for a common protocol for doing this. /// If the link is to the media itself, you can create a player for it. /// Below is an example of creating a link to the video as well as a player. final URI videoLinkURI = _demoUserProfileDAO.toURI(videoLink, null); URILink videoLinkComponent = new URILink(videoLinkURI, createText("My Video")); videoLinkComponent.setTarget("_blank"); aboutMeVideo = getAboutMe(videoLink, videoLinkURI, videoLinkComponent); if (aboutMeVideo == null) { // We could check for oEmbed support in case link was to youtube, vimeo, etc - http://oembed.com/ // Since this is an example, we'll just output the link. aboutMeVideo = videoLinkComponent; } } ImageComponent picture = null; final FileEntity userProfilePicture = demoUserProfile.getPicture(); if (userProfilePicture != null) { picture = new ImageComponent(new Image(userProfilePicture)); picture.setImageCaching(userProfilePicture.getLastModifiedTime() .before(new Date(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(60)))); } // Now that we've initialized most of the content, we'll add all the components to this View /// using the standard HTML structure for a property viewer. add(of(HTMLElement.section, "prop-group name", new Label(createText("Name")).withHTMLElement(HTMLElement.h1), namePrefix.setAttribute("itemprop", "honorificPrefix").addClassName("prop").addClassName("prefix"), nameGiven.setAttribute("itemprop", "givenName").addClassName("prop").addClassName("given"), nameFamily.setAttribute("itemprop", "familyName").addClassName("prop").addClassName("family"), nameSuffix.setAttribute("itemprop", "honorificSuffix").addClassName("prop") .addClassName("suffix"))); // Add wrapping DIV to group address lines if necessary. Component streetAddress = (!isEmptyString(addressLine1.getText()) && !isEmptyString(addressLine2.getText()) ? of(HTMLElement.div, "address-lines", addressLine1, addressLine2) : (isEmptyString(addressLine1.getText()) ? addressLine2 : addressLine1) .withHTMLElement(HTMLElement.div)); streetAddress.setAttribute("itemprop", "streetAddress"); boolean hasAddress = (!isEmptyString(addressLine1.getText()) || !isEmptyString(addressLine2.getText()) || !isEmptyString(city.getText()) || !isEmptyString(state.getText()) || !isEmptyString(postalCode.getText())); boolean hasPhone = !isEmptyString(phoneNumber.getText()); boolean hasEmail = !isEmptyString(emailAddress.getText()); // We only want to output the enclosing HTML if we have content to display. if (hasAddress || hasPhone || hasEmail) { Container contactContainer = of(HTMLElement.section, "contact", new Label(createText("Contact Information")).withHTMLElement(HTMLElement.h1)); add(contactContainer); if (hasAddress) { contactContainer.add(of(HTMLElement.div, "prop-group address", // We are using an H2 here because are immediate ancestor is a DIV. If it was a SECTION, /// then we would use an H1. See the UserProfileViewer for a comparison. new Label(createText("Address")).withHTMLElement(HTMLElement.h2), streetAddress, of(HTMLElement.div, "place", city, state.setAttribute("itemprop", "addressRegion"), postalCode.setAttribute("itemprop", "postalCode"))) .setAttribute("itemprop", "address").setAttribute("itemscope", "") .setAttribute("itemtype", "http://schema.org/PostalAddress")); } if (hasPhone) { contactContainer.add(of(HTMLElement.div, "prop phone", new Label(createText("Phone")).withHTMLElement(HTMLElement.h2), phoneNumber.setAttribute("itemprop", "telephone"))); } if (hasEmail) { contactContainer.add(of(HTMLElement.div, "prop email", new Label(createText("Email")).withHTMLElement(HTMLElement.h2), emailAddress.setAttribute("itemprop", "email"))); } } if (twitterLink != null || facebookLink != null || linkedInLink != null) { Container social = of(HTMLElement.section, "social", new Label(createText("Social Media Links")).withHTMLElement(HTMLElement.h1)); add(social); if (twitterLink != null) { twitterLink.setTarget("_blank"); twitterLink.setText(createText("Twitter Link")); social.add(of(HTMLElement.div, "prop twitter", createText("Twitter"), twitterLink)); } if (facebookLink != null) { facebookLink.setTarget("_blank"); facebookLink.setText(createText("Facebook Link")); social.add(of(HTMLElement.div, "prop facebook", createText("Facebook"), facebookLink)); } if (linkedInLink != null) { linkedInLink.setTarget("_blank"); linkedInLink.setText(createText("LinkedIn Link")); social.add(of(HTMLElement.div, "prop linkedin", createText("LinkedIn"), linkedInLink)); } } final boolean hasAboutMeProse = isEmptyString(aboutMeProse.getText()); if (!hasAboutMeProse || aboutMeVideo != null) { Container aboutMe = of(HTMLElement.section, "about-me", new Label(createText("About Me")).withHTMLElement(HTMLElement.h1)); add(aboutMe); if (picture != null) { aboutMe.add(of(HTMLElement.div, "prop picture", createText("Picture"), picture)); } if (hasAboutMeProse) { aboutMe.add(of(HTMLElement.div, "prop prose", createText("Professional Information, Hobbies, Interests..."), aboutMeProse)); } if (aboutMeVideo != null) { Label label = new Label(createText("Video")).withHTMLElement(HTMLElement.label); label.addClassName("vl"); aboutMe.add(of(HTMLElement.div, "prop video", label, aboutMeVideo)); } } }
From source file:com.example.app.ui.UserProfileViewer.java
@Override public void init() { // Make sure you call super.init() at the top of this method. /// See the Javadoc for #init() for more information about what it does. super.init(); // Set HTML element type and class names for presentation use on this Container component. setHTMLElement(HTMLElement.section); addClassName("user-profile-viewer"); // property_viewer is a standard class name. addClassName("property-viewer"); // Add microdata for programmatic / SEO use /// OR use RDFa support /// You typically only do this in viewers - not editors. setAttribute("itemscope", ""); setAttribute("itemtype", "http://schema.org/Person"); // setAttribute allows you to set any attribute as long as it will not interfere with a component's /// native HTML. For example, you cannot set the "value" attribute on a Field since /// it uses that attribute. // It's a good idea to *not* mark variables final that you don't want in the scope of event listeners. /// Hibernate/JPA entities are a great example of this pattern. You always need to re-attach /// entities before using them, so we should always call getUserProfile() in the context /// of handling an event. Note: our getUserProfile() method re-attaches the entity. UserProfile userProfile = getUserProfile(); Name name = userProfile.getName(); // You can use a Field for displaying non-internationalized content. /// It is desirable to do this since you don't need to create a LocalizedText. /// However, you cannot change the HTMLElement of a Field at this time, /// so some of the following code uses a Label which does allow /// specification of the HTMLElement. final Field namePrefix = new Field(name.getFormOfAddress(), false); final Field nameGiven = new Field(name.getFirst(), false); final Field nameFamily = new Field(name.getLast(), false); final Field nameSuffix = new Field(name.getSuffix(), false); // Sometimes it is easier and less error prone to make a component non-visible /// than checking for null on each use. Use this pattern with care. You don't /// want to consume a lot of resource unnecessarily. if (StringFactory.isEmptyString(namePrefix.getText())) namePrefix.setVisible(false);/*from w ww .j a v a2 s .c o m*/ if (StringFactory.isEmptyString(nameSuffix.getText())) nameSuffix.setVisible(false); // Address Address address = userProfile.getPostalAddress(); // Address lines are always on their own line so we make sure they are enclosed by a block element like a DIV.. final Label addressLine1 = new Label(); addressLine1.setHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address_line"); final Label addressLine2 = new Label(); addressLine2.setHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address_line"); if (address.getAddressLines().length > 0) addressLine1.setText(TextSources.create(address.getAddressLines()[0])); if (address.getAddressLines().length > 1) addressLine2.setText(TextSources.create(address.getAddressLines()[1])); final HTMLComponent city = new HTMLComponent(); // The "prop" class name is part of the standard HTML structure. It is always a good idea to also /// add a specific class name like "city" in this example. Please be consistent when using class names. /// For example, if everyone else is using "city", please use "city" too. Don't come up with another class name /// that means something similar like "town" or "locality". Consistency has a big impact on /// the time required to style HTML as well as the ability to reuse CSS. city.setHTMLElement(HTMLElement.span).addClassName("prop").addClassName("city"); if (!StringFactory.isEmptyString(address.getCity())) { // Our microdata for the city shouldn't include the comma, so this is a bit more complicated than the other examples. city.setText(TextSources.create("<span itemprop=\"addressLocality\">" + address.getCity() + "</span><span class=\"delimiter\">,</span>")); } else city.setVisible(false); final Label state = new Label(TextSources.create(address.getState())); state.addClassName("prop").addClassName("state"); final Label postalCode = new Label(TextSources.create(address.getPostalCode())); postalCode.addClassName("prop").addClassName("postal_code"); // Other Contact final Field phoneNumber = new Field(userProfile.getPhoneNumber(), false); final Field emailAddress = new Field(userProfile.getEmailAddress(), false); // Social Contact final URILink twitterLink = userProfile.getTwitterLink() != null ? new URILink(_userProfileDAO.toURI(userProfile.getTwitterLink(), null)) : null; final URILink facebookLink = userProfile.getFacebookLink() != null ? new URILink(_userProfileDAO.toURI(userProfile.getFacebookLink(), null)) : null; final URILink linkedInLink = userProfile.getLinkedInLink() != null ? new URILink(_userProfileDAO.toURI(userProfile.getLinkedInLink(), null)) : null; // We are going to output HTML received from the outside, so we need to sanitize it first for security reasons. /// Sometimes you'll do this sanitation prior to persisting the data. It depends on whether or not you need to /// keep the original unsanitized HTML around. String processedHTML = userProfile.getAboutMeProse(); if (!StringFactory.isEmptyString(processedHTML)) { // Process the HTML converting links as necessary (adding JSESSIONID(s) /// for URL based session tracking, converting resource links to increase concurrent loading limit, /// CMS link externalization, etc). /// This is *not* sanitation and should always be done before rendering - never before persisting. /// We are doing this before sanitizing the HTML to avoid having to whitelist internal URL protocols, etc. try { processedHTML = XMLRenderer.parseWithRoot(processedHTML, Event.getRequest(), Event.getResponse()); } catch (IOException e) { _logger.error("Unable to accept HTML: " + processedHTML, e); } // We don't trust the input, so we sanitize it with a whitelist of allowed HTML. Document dirty = Jsoup.parseBodyFragment(processedHTML, ""); Whitelist whitelist = Whitelist.relaxed(); // Don't allow users to use our website as a link farm whitelist.addEnforcedAttribute("a", "rel", "nofollow"); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(dirty); processedHTML = clean.html(); } final HTMLComponent aboutMeProse = new HTMLComponent(processedHTML); Component aboutMeVideo = null; URL videoLink = userProfile.getAboutMeVideoLink(); if (videoLink != null) { // There are several ways to link to media (Youtube video URL, Vimeo video URL, Flickr URL, internally hosted media file, etc). /// You can link to it. /// You can embed it. See http://oembed.com/ for a common protocol for doing this. /// If the link is to the media itself, you can create a player for it. /// Below is an example of creating a link to the video as well as a player. final URI videoLinkURI = _userProfileDAO.toURI(videoLink, null); URILink videoLinkComponent = new URILink(videoLinkURI, TextSources.create("My Video")); videoLinkComponent.setTarget("_blank"); IMediaUtility util = MediaUtilityFactory.getUtility(); try { // Check if we can parse the media and it has a stream we like. /// In our made up example, we're only accepting H.264 video. We don't care about the audio in this example. IMediaMetaData mmd; if (util.isEnabled() && videoLinkURI != null && (mmd = util.getMetaData(videoLinkURI.toString())).getStreams().length > 0) { int width = 853, height = 480; // 480p default boolean hasVideo = false; for (IMediaStream stream : mmd.getStreams()) { if (stream.getCodec().getType() == ICodec.Type.video && "H264".equals(stream.getCodec().name())) { hasVideo = true; if (stream.getWidth() > 0) { width = stream.getWidth(); height = stream.getHeight(); } break; } } if (hasVideo) { Media component = new Media(); component.setMediaType(Media.MediaType.video); component.addSource(new MediaSource(videoLinkURI)); component.setFallbackContent(videoLinkComponent); component.setSize(new PixelMetric(width), new PixelMetric(height)); aboutMeVideo = component; } } } catch (IllegalArgumentException | RemoteException e) { _logger.error("Unable to get media information for " + videoLink, e); } if (aboutMeVideo == null) { // We could check for oEmbed support in case link was to youtube, vimeo, etc - http://oembed.com/ // Since this is an example, we'll just output the link. aboutMeVideo = videoLinkComponent; } } ImageComponent picture = null; final FileEntity userProfilePicture = userProfile.getPicture(); if (userProfilePicture != null) { picture = new ImageComponent(new Image(userProfilePicture)); picture.setImageCaching(userProfilePicture.getLastModifiedTime() .before(new Date(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(60)))); } // Now that we've initialized most of the content, we'll add all the components to this View /// using the standard HTML structure for a property viewer. add(of(HTMLElement.section, "prop-group name", new Label(TextSources.create("Name")).setHTMLElement(HTMLElement.h1), namePrefix.setAttribute("itemprop", "honorificPrefix").addClassName("prop").addClassName("prefix"), nameGiven.setAttribute("itemprop", "givenName").addClassName("prop").addClassName("given"), nameFamily.setAttribute("itemprop", "familyName").addClassName("prop").addClassName("family"), nameSuffix.setAttribute("itemprop", "honorificSuffix").addClassName("prop") .addClassName("suffix"))); // Add wrapping DIV to group address lines if necessary. Component streetAddress = (!StringFactory.isEmptyString(addressLine1.getText()) && !StringFactory.isEmptyString(addressLine2.getText()) ? of(HTMLElement.div, "address_lines", addressLine1, addressLine2) : (StringFactory.isEmptyString(addressLine1.getText()) ? addressLine2 : addressLine1) .setHTMLElement(HTMLElement.div)); streetAddress.setAttribute("itemprop", "streetAddress"); boolean hasAddress = (!StringFactory.isEmptyString(addressLine1.getText()) || !StringFactory.isEmptyString(addressLine2.getText()) || !StringFactory.isEmptyString(city.getText()) || !StringFactory.isEmptyString(state.getText()) || !StringFactory.isEmptyString(postalCode.getText())); boolean hasPhone = !StringFactory.isEmptyString(phoneNumber.getText()); boolean hasEmail = !StringFactory.isEmptyString(emailAddress.getText()); // We only want to output the enclosing HTML if we have content to display. if (hasAddress || hasPhone || hasEmail) { Container contactContainer = of(HTMLElement.section, "contact", new Label(TextSources.create("Contact Information")).setHTMLElement(HTMLElement.h1)); add(contactContainer); if (hasAddress) { contactContainer.add(of(HTMLElement.div, "prop-group address", // We are using an H2 here because are immediate ancestor is a DIV. If it was a SECTION, /// then we would use an H1. See the UserProfileViewer for a comparison. new Label(TextSources.create("Address")).setHTMLElement(HTMLElement.h2), streetAddress, of(HTMLElement.div, "place", city, state.setAttribute("itemprop", "addressRegion"), postalCode.setAttribute("itemprop", "postalCode"))) .setAttribute("itemprop", "address").setAttribute("itemscope", "") .setAttribute("itemtype", "http://schema.org/PostalAddress")); } if (hasPhone) { contactContainer.add(of(HTMLElement.div, "prop phone", new Label(TextSources.create("Phone")).setHTMLElement(HTMLElement.h2), phoneNumber.setAttribute("itemprop", "telephone"))); } if (hasEmail) { contactContainer.add(of(HTMLElement.div, "prop email", new Label(TextSources.create("Email")).setHTMLElement(HTMLElement.h2), emailAddress.setAttribute("itemprop", "email"))); } } if (twitterLink != null || facebookLink != null || linkedInLink != null) { Container social = of(HTMLElement.section, "social", new Label(TextSources.create("Social Media Links")).setHTMLElement(HTMLElement.h1)); add(social); if (twitterLink != null) { twitterLink.setTarget("_blank"); twitterLink.setText(TextSources.create("Twitter Link")); social.add(of(HTMLElement.div, "prop twitter", TextSources.create("Twitter"), twitterLink)); } if (facebookLink != null) { facebookLink.setTarget("_blank"); facebookLink.setText(TextSources.create("Facebook Link")); social.add(of(HTMLElement.div, "prop facebook", TextSources.create("Facebook"), facebookLink)); } if (linkedInLink != null) { linkedInLink.setTarget("_blank"); linkedInLink.setText(TextSources.create("LinkedIn Link")); social.add(of(HTMLElement.div, "prop linkedin", TextSources.create("LinkedIn"), linkedInLink)); } } final boolean hasAboutMeProse = StringFactory.isEmptyString(aboutMeProse.getText()); if (!hasAboutMeProse || aboutMeVideo != null) { Container aboutMe = of(HTMLElement.section, "about_me", new Label(TextSources.create("About Me")).setHTMLElement(HTMLElement.h1)); add(aboutMe); if (picture != null) { aboutMe.add(of(HTMLElement.div, "prop picture", TextSources.create("Picture"), picture)); } if (hasAboutMeProse) { aboutMe.add(of(HTMLElement.div, "prop prose", TextSources.create("Professional Information, Hobbies, Interests..."), aboutMeProse)); } if (aboutMeVideo != null) { Label label = new Label(TextSources.create("Video")).setHTMLElement(HTMLElement.label); label.addClassName("vl"); aboutMe.add(of(HTMLElement.div, "prop video", label, aboutMeVideo)); } } }
From source file:io.gravitee.management.service.impl.EmailServiceImpl.java
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception { final Document document = Jsoup.parse(htmlText); final List<String> resources = new ArrayList<>(); final Elements imageElements = document.getElementsByTag("img"); resources.addAll(//from ww w.j av a2 s . c o m imageElements.stream().filter(imageElement -> imageElement.hasAttr("src")).map(imageElement -> { final String src = imageElement.attr("src"); imageElement.attr("src", "cid:" + src); return src; }).collect(Collectors.toList())); final String html = document.html(); mailMessage.setText(html, true); for (final String res : resources) { final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res)); mailMessage.addInline(res, templateResource, MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(res)); } return html; }
From source file:me.vertretungsplan.parser.UntisMonitorParser.java
public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException { loginResponse = new LoginHandler(scheduleData, credential, cookieProvider).handleLoginWithResponse(executor, cookieStore);/*w w w . ja va 2s.c o m*/ SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); JSONArray urls = scheduleData.getData().getJSONArray(PARAM_URLS); String encoding = scheduleData.getData().optString(PARAM_ENCODING, null); List<Document> docs = new ArrayList<>(); for (int i = 0; i < urls.length(); i++) { JSONObject url = urls.getJSONObject(i); final String urlStr = url.getString(SUBPARAM_URL); for (String dateUrl : ParserUtils.handleUrlWithDateFormat(urlStr)) { loadUrl(dateUrl, encoding, url.getBoolean(SUBPARAM_FOLLOWING), docs); } } for (Document doc : docs) { if (scheduleData.getData().has(PARAM_EMBEDDED_CONTENT_SELECTOR)) { for (Element part : doc.select(scheduleData.getData().getString(PARAM_EMBEDDED_CONTENT_SELECTOR))) { SubstitutionScheduleDay day = parseMonitorDay(part, scheduleData.getData()); v.addDay(day); } } else if (doc.title().contains("Untis") || doc.html().contains("<!--<title>Untis")) { SubstitutionScheduleDay day = parseMonitorDay(doc, scheduleData.getData()); v.addDay(day); } // else Error if (scheduleData.getData().has(PARAM_LAST_CHANGE_SELECTOR) && doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).size() > 0) { String text = doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).first() .text(); String lastChange; Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d,? \\d\\d:\\d\\d"); Matcher matcher = pattern.matcher(text); if (matcher.find()) { lastChange = matcher.group(); } else { lastChange = text; } v.setLastChangeString(lastChange); v.setLastChange(ParserUtils.parseDateTime(lastChange)); } } if (scheduleData.getData().has(PARAM_WEBSITE)) { v.setWebsite(scheduleData.getData().getString(PARAM_WEBSITE)); } else if (urls.length() == 1) { v.setWebsite(urls.getJSONObject(0).getString("url")); } v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; }
From source file:org.confab.PhpBB3Parser.java
public List<Forum> parseForums(Document root, BulletinBoard parent) { Utilities.debug("parseForums"); List<Forum> ret = new ArrayList<Forum>(); // get table/*from w ww . j av a 2 s.co m*/ Elements forum_tables = root.select("ul[class=topiclist forums]"); assert !forum_tables.isEmpty() : root.html(); for (Element forum_table : forum_tables) { Elements els_li = forum_table.select("li.row"); assert !els_li.isEmpty(); for (Element el_li : els_li) { Forum new_forum = new Forum(parent); // Get the forum url Elements els_a = el_li.select("a.forumtitle"); Element el_a = els_a.first(); assert el_a != null; new_forum.url = el_a.attr("href"); assert new_forum.url != null; Utilities.debug("new_forum.url : " + new_forum.url); // Get the title text new_forum.title = el_a.text(); assert new_forum.title != null; Utilities.debug("new_forum.title : " + new_forum.title); // Check for any subforums in remaining a elements els_a.remove(els_a.first()); for (Element _el_a : els_a) { Forum sub_forum = new Forum(parent); sub_forum.url = el_a.attr("href"); assert sub_forum.url != null; sub_forum.title = el_a.text(); assert sub_forum.title != null; new_forum.subForums.add(sub_forum); Utilities.debug("added subForum: " + sub_forum.title); } // Get the description/message of this topic String el_description = el_a.parent().text(); if (el_description != null) { new_forum.description = el_description; } else { new_forum.description = ""; } Utilities.debug("new_forum.description : " + new_forum.description); Utilities.debug("new_forum.parent.url : " + new_forum.parent.url); ret.add(new_forum); Utilities.debug("-----"); } } Utilities.debug("end parseForums"); return ret; }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static List<ConfluencePage> handlePagination() { final List<ConfluencePage> confluencePages = new ArrayList<>(); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode(); final Document originalDocument = SWAGGER_DOCUMENT.get(); final Document transformedDocument = originalDocument.clone(); final Elements categoryElements = transformedDocument.select(".sect1"); // Remove ToC form the transformed document final Elements toc = transformedDocument.select(".toc"); toc.html(""); toc.unwrap();//from ww w. j ava 2s .com // For Single Page Mode, the incoming XHTML can be used directly. if (paginationMode == SINGLE_PAGE) { final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .build(); if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { confluencePage.setXhtml(originalDocument.html()); } else { confluencePage.setXhtml(transformedDocument.html()); } confluencePages.add(confluencePage); return confluencePages; } // Before beginning further processing, we need to know if we're in individual // page mode or not, as that will effect how we split the DOM. If we're in this // mode then the category pages will contain inner table of contents. final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES); // From here on, if we're still proceeding then we know the meat of the document // will go in sub-pages. So for the master page, we will use the table of contents final Elements tocElements = originalDocument.select(".toc"); final List<String> innerTocXHtmlList = new ArrayList<>(); final Elements innerTocElements = originalDocument.select(".sectlevel2"); for (final Element innerTocElement : innerTocElements) { // If we're in individual page mode, then we collect the inner ToCs if (individualPages) { final StringBuilder tocHtml = new StringBuilder(); tocHtml.append("<div id=\"toc\" class=\"toc\">"); tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>"); tocHtml.append("<div><ul class=\"sectlevel1\">"); tocHtml.append(innerTocElement.html()); tocHtml.append("</ul></div></div>"); innerTocXHtmlList.add(tocHtml.toString()); } // If we're in category page mode, then we strip out the inner table of contents. else { innerTocElement.html(""); innerTocElement.unwrap(); } } // Build the Root Page w/ the Appropriate Level of Table of Contents final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .withXhtml(tocElements.html()).build(); confluencePages.add(rootConfluencePage); int category = 1; // Now we process the category pages for (final Element categoryElement : categoryElements) { // Fetch the title from the first child, which is the header element final String categoryTitle = categoryElement.children().first().text(); // If we're in individual mode then we need these to be sub table of contents if (individualPages) { final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(innerTocXHtmlList.get(category - 1)).build(); confluencePages.add(categoryConfluencePage); final Elements individualElements = categoryElement.getElementsByClass("sect2"); int individual = 1; for (final Element individualElement : individualElements) { final String individualTitle = individualElement.children().first().text(); final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle) .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual)) .withXhtml(individualElement.html()).build(); confluencePages.add(individualConfluencePage); individual++; } category++; continue; } // If we're in category mode, we use the remaining page data final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(categoryElement.html()).build(); confluencePages.add(categoryConfluencePage); category++; } return confluencePages; }
From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java
public void parseWxOpenId() { Document doc; try {//w ww. j av a 2s . c om // need http protocol // doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE+ wxFoo.getSubscribeId()).get(); doc = Jsoup.connect("http://weixin.sogou.com/weixin?type=1&query=" + wxFoo.getSubscribeId() + "&fr=sgsearch&ie=utf8&_ast=1423915648&_asf=null&w=01019900&cid=null&sut=19381").get(); LOG.debug("openID html INFO:" + doc.html()); // get page title String title = doc.title(); LOG.debug("title : " + title); // get all "?:" value of html <span> //Elements openIdLink = doc.select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELEMENTS).select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELE_IDENTITY); Elements openIdLink = doc.getElementsByClass("wx-rb"); Element a = null; String openIdLinkHref = ""; if (openIdLink != null && openIdLink.size() > 0) { Iterator<Element> itea = openIdLink.iterator(); while (itea.hasNext()) { a = itea.next(); LOG.debug("openID html INFO:" + a.html()); if (a.getElementsByTag("em").html().indexOf(wxFoo.getSubscribeId()) != -1) { break; } } } if (a != null) { openIdLinkHref = a.attr("href"); } LOG.debug("openIdLinkHref:" + openIdLinkHref); // FIXME:???? if (this.wxFoo.getOpenId() == null && openIdLinkHref.length() > 0) { this.wxFoo.setOpenId(openIdLinkHref.split(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_KEYWORDS)[1]); LOG.info("saved wxOpenId value: " + this.wxFoo.getOpenId()); GlobalVariables.wxFooListWithOpenId.add(this.wxFoo); // File reporting new FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID, GlobalVariables.wxFooListWithOpenId, FileReporter.REPORTER_TYPE.R_T_OPENID, FileReporter.REPORTER_FILE_TYPE.EXCEL).write(); // Then,OpenID JSON site parse if (this.wxFoo.getOpenId() != null) { // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), this.wxFoo.getOpenId() }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } else { LOG.warn("SogouSearchQuery getOpenId Failure! site info:" + wxFoo.getCode()); // TODO write those info to File or DB for collect which // agency not open weixin service // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), "" }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); LOG.warn("Can not get subsriber info: " + this.wxFoo.getCode()); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } }
From source file:com.maxl.java.aips2sqlite.PseudoExpertInfo.java
/** * Extracts all the important information from the pseudo "Fachinfo" file * @param pseudo_info_file/*from w w w . j av a 2s .c om*/ */ public boolean extractInfo(int idx, FileInputStream pseudo_info_file) { mMedi = new MedicalInformations.MedicalInformation(); mSectionContent = new ArrayList<String>(); mSectionTitles = new ArrayList<String>(); mBarCodes = new ArrayList<String>(); m_list_of_packages = new ArrayList<String>(); String mediTitle = ""; String mediAuthor = ""; String mediPseudoTag = ""; String mediHtmlContent = ""; StringBuilder content = new StringBuilder(); try { // Read in docx file XWPFDocument docx = new XWPFDocument(pseudo_info_file); // Get iterator through all paragraphs Iterator<XWPFParagraph> para = docx.getParagraphsIterator(); // Pre-process input stream to extract paragraph titles boolean goodToGo = false; while (para.hasNext()) { List<XWPFRun> runs = para.next().getRuns(); if (!runs.isEmpty()) { for (XWPFRun r : runs) { // bold and italics identifies section title! if (r.isBold()) { // && r.isItalic()) { String pText = r.getParagraph().getText(); // These are the first chapter titles (DE and FR) if (pText.equals("Zusammensetzung") || pText.equals("Composition")) goodToGo = true; if (goodToGo == true) mSectionTitles.add(pText); } } } } // Add "nil" at the end mSectionTitles.add("nil"); if (mLanguage.equals("de") && !mSectionTitles.get(0).equals("Zusammensetzung")) return false; if (mLanguage.equals("fr") && !mSectionTitles.get(0).equals("Composition")) return false; // Reset iterator para = docx.getParagraphsIterator(); // Init list for section content for (int i = 0; i < mSectionTitles.size(); ++i) mSectionContent.add(i, ""); // Get title if (para.hasNext()) mediTitle = para.next().getParagraphText(); // Get author while using "Medizinprodukt" as tag String prevParaText = ""; while (para.hasNext()) { String paraText = para.next().getParagraphText(); // If this word is not found, then no pseudo FI will be produced if (paraText.equals("Medizinprodukt") || paraText.equals("Dispositif mdical")) { mediPseudoTag = paraText; mediAuthor = prevParaText; break; } prevParaText = paraText; } // Get section titles + sections + ean codes boolean isSectionPackungen = false; int numSection = 0; // Init with section1 and title String sectionId_str = ""; String sectionTitle_str = ""; mEanCodes_str = ""; mSectionIds_str = "section1,"; mSectionTitles_str = mediTitle + ","; m_pack_info_str = ""; // This is the EAN code pattern Pattern pattern = Pattern.compile("^[0-9]{13}"); // Loop through it, identifying medication title, author, section titles and corresponding titles while (para.hasNext()) { String paraText = para.next().getParagraphText(); if (paraText.equals(mSectionTitles.get(numSection))) { // ->> Get section title isSectionPackungen = false; // Get section title if (numSection < mSectionTitles.size()) numSection++; // Section "Packungen" is special if (paraText.equals("Packungen") || paraText.equals("Prsentation")) { isSectionPackungen = true; } // Close previous div if (numSection > 1) content.append("</div>"); // Create html sectionId_str = "section" + (numSection + 1); // section1 is reserved for the MonTitle sectionTitle_str = mSectionTitles.get(numSection - 1); content.append("<div class=\"paragraph\" id=\"" + sectionId_str + "\">"); content.append("<div class=\"absTitle\">" + sectionTitle_str + "</div>"); // Generate section id string mSectionIds_str += (sectionId_str + ","); // Generate titles string mSectionTitles_str += (sectionTitle_str + ";"); } else { // ->> Get section content String s = mSectionContent.get(numSection - 1); mSectionContent.set(numSection - 1, s + paraText + " "); // Create html content.append("<p class=\"spacing1\">" + paraText + "</p>"); // Extract EAN codes and start positions Matcher matcher = pattern.matcher(paraText); while (matcher.find()) { String eanCode = matcher.group(); mEanCodes_str += (eanCode + ", "); if (!eanCode.isEmpty()) { String pup = ""; String efp = ""; String fep = ""; String fap = ""; String vat = ""; String size = ""; String units = ""; String swissmedic_cat = ""; String pharma_code = ""; int visible = 0xff; int has_free_samples = 0x00; // by default no free samples // Exctract fep and fap pricing information // FAP = Fabrikabgabepreis = EFP? // FEP = Fachhandelseinkaufspreis // EFP = FAP < FEP < PUP if (m_map_products != null && eanCode != null && m_map_products.containsKey(eanCode)) { Product product = m_map_products.get(eanCode); if (product.efp > 0.0f) efp = String.format("CHF %.2f", product.efp); if (product.pp > 0.0f) pup = String.format("CHF %.2f", product.pp); if (product.fap > 0.0f) fap = String.format("CHF %.2f", product.fap); if (product.fep > 0.0f) fep = String.format("CHF %.2f", product.fep); if (product.vat > 0.0f) vat = String.format("%.2f", product.vat); if (product.size != null && !product.size.isEmpty()) size = product.size; if (product.units != null && product.units.length > 0) units = product.units[0]; if (product.swissmedic_cat != null && !product.swissmedic_cat.isEmpty()) swissmedic_cat = product.swissmedic_cat; if (product.pharmacode != null && !product.pharmacode.isEmpty()) pharma_code = product.pharmacode; visible = product.visible; has_free_samples = product.free_sample; } m_list_of_packages.add(mediTitle.toUpperCase() + ", " + units + ", " + size + "|" + size + "|" + units + "|" + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|" + swissmedic_cat + ",,|" + eanCode + "|" + pharma_code + "|" + visible + "|" + has_free_samples + "\n"); // Generate bar codes BarCode bc = new BarCode(); String barcodeImg64 = bc.encode(eanCode); mBarCodes.add("<p class=\"spacing1\">" + barcodeImg64 + "</p>"); content.append(barcodeImg64); } } // Generate section Packungen for search result if (isSectionPackungen) m_pack_info_str += (paraText + "\n"); } } /* // Add chapter "Barcodes" content.append("<p class=\"paragraph\"></p><div class=\"absTitle\">" + "Barcodes" + "</div>"); for (String bcode : mBarCodes) content.append(bcode); */ // Remove last comma from mEanCodes_str if (!mEanCodes_str.isEmpty()) mEanCodes_str = mEanCodes_str.substring(0, mEanCodes_str.length() - 2); // Remove last \n from mSectionPackungen_str if (!m_pack_info_str.isEmpty()) m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1); // Set title, autor mMedi.setTitle(mediTitle); mMedi.setAuthHolder(mediAuthor); mMedi.setAtcCode("PSEUDO"); mMedi.setSubstances(mediTitle); System.out.println(idx + " - " + mediTitle + ": " + mEanCodes_str); // Close previous div + monographie div content.append("</div></div>"); String title = "<div class=\"MonTitle\" id=\"section1\">" + mediTitle + "</div>"; String author = "<div class=\"ownerCompany\"><div style=\"text-align: right;\">" + mediAuthor + "</div></div>"; // Set "Medizinprodukt" label String pseudo = "<p class=\"spacing1\">" + mediPseudoTag + "</p>"; // Set medi content mediHtmlContent = "<html><head></head><body><div id=\"monographie\">" + title + author + pseudo + content.toString() + "</div></body></html>"; // Generate clean html file Document doc = Jsoup.parse(mediHtmlContent); doc.outputSettings().escapeMode(EscapeMode.xhtml); doc.outputSettings().charset("UTF-8"); doc.outputSettings().prettyPrint(true); doc.outputSettings().indentAmount(1); mediHtmlContent = doc.html(); // Set html content mMedi.setContent(mediHtmlContent); // Add to DB addToDB(); return true; } catch (IOException e) { e.printStackTrace(); return false; } }
From source file:gui.InboxPanel.java
private void setTextBody(String sbody) { String html = BodyTextPane.getText(); Document doc = Jsoup.parseBodyFragment(html); //Element body = doc.body(); //body.text(sbody); doc.select("body").html(sbody); BodyTextPane.setText(doc.html()); }
From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java
/** * Fetch the web page from the URL, parse the HTML to populate the metadata * required by WebPageSnapshotModel, and return the constructed * WebPageSnapshotModel./* www .j a va 2 s . c o m*/ * * @param url The URL to fetch the web page from * @return The WebPageSnapshotModel * @throws IOException Thrown if there's an issue fetching the web page. */ private WebPageSnapshotModel fetchWebPage(String url, String contentKey) throws IOException { long fetchTime = System.currentTimeMillis(); Connection connection = Jsoup.connect(url); Response response = connection.execute(); long postFetchTime = System.currentTimeMillis(); int timeToFetch = (int) (postFetchTime - fetchTime); Document doc = response.parse(); String destinationUrl = response.url().toString(); String title = doc.title(); String description = getDescriptionFromDocument(doc); List<String> keywords = getKeywordsFromDocument(doc); List<String> outlinks = getOutlinksFromDocument(doc); return WebPageSnapshotModel.newBuilder().setUrl(destinationUrl) .setFetchedAtRevTs(Long.MAX_VALUE - fetchTime).setSize(doc.html().length()).setFetchedAt(fetchTime) .setFetchTimeMs(timeToFetch).setTitle(title).setDescription(description).setKeywords(keywords) .setOutlinks(outlinks).setContentKey(contentKey).setContent(ImmutableMap.of(contentKey, doc.html())) .build(); }