List of usage examples for org.jsoup.nodes Document getElementsByTag
public Elements getElementsByTag(String tagName)
From source file:com.kylemsguy.fishyfishes.MainActivity.java
public String extractParagraphs(String xmlText) { StringBuilder b = new StringBuilder(); try {/*from w w w. j a v a 2 s .c o m*/ org.jsoup.nodes.Document res = Jsoup.parse(xmlText); Elements pList = res.getElementsByTag("p"); int i = 1; while (i < pList.size()) { b.append(pList.get(i).toString()); b.append('\n'); i++; } return b.toString(); } catch (Exception e) { e.printStackTrace(); return null; } }
From source file:github.popeen.dsub.fragments.SelectPodcastsFragment.java
private String getRssFeed(String url) { String url2 = url;//from www . j a v a 2 s . co m try { if (url.toLowerCase().contains("itunes.apple.com")) { Pattern pattern = Pattern.compile("/id([0-9]*)"); Matcher matcher = pattern.matcher(url); if (matcher.find()) { try { String raw = KakaduaUtil.http_get_contents( "https://itunes.apple.com/lookup?id=" + matcher.group(1) + "&entity=podcast"); url2 = new JSONObject(raw).getJSONArray("results").getJSONObject(0).getString("feedUrl"); Log.w("podcast", url2); } catch (Exception e) { Log.w("podcast", e.toString()); } } } else if (url.toLowerCase().contains("soundcloud.com")) { try { Document doc = Jsoup.connect(url).get(); Elements metas = doc.getElementsByTag("meta"); for (Element meta : metas) { if (meta.attr("property").equals("al:android:url")) { String id = meta.attr("content").replace("soundcloud://users:", ""); url2 = "https://feeds.soundcloud.com/users/soundcloud:users:" + id + "/sounds.rss"; Log.w("podcast", url2); } } } catch (Exception e) { Log.w("podcast", e.toString()); } } else if (url.toLowerCase().contains("player.fm/series")) { try { Document doc = Jsoup.connect(url).get(); Elements links = doc.select(".blatant"); for (Element link : links) { if (link.text().equals("Public Feed")) { url2 = link.attr("href"); Log.w("podcast", url2); } } } catch (Exception e) { Log.w("podcast", e.toString()); } } else if (url.toLowerCase().contains("acast.com") || url.toLowerCase().contains("podbean.com")) { /* TODO, This uses a standard tag that most podcasting websites support. Make it always try this if the url entered is not a valid feed or a website with a specific conversion above like for example iTunes. If it fails it needs to prevent the app from adding it to the server and then show an error message */ try { Document doc = Jsoup.connect(url).get(); Elements links = doc.select("link"); for (Element link : links) { if (link.attr("type").equals("application/rss+xml")) { url2 = link.attr("href"); Log.w("podcast", url2); } } } catch (Exception e) { Log.w("podcast", e.toString()); } } } catch (Exception e) { Log.w("podcast", e.toString()); } return url2; }
From source file:nl.sidn.pcap.ip.GoogleResolverCheck.java
@Override protected void init() { String url = Settings.getInstance().getSetting(Settings.RESOLVER_LIST_GOOGLE); LOGGER.info("Load Google resolver addresses from url: " + url); Document doc = null; try {// ww w. ja va 2 s . co m doc = Jsoup.connect(url).get(); } catch (Exception e) { LOGGER.error("Problem while getting Google resolvers url: " + url); return; } Elements tags = doc.getElementsByTag("pre"); if (tags.size() == 2) { Element resolvers = tags.get(0); //Element resolver = codes.get(0); String[] ips = StringUtils.split(resolvers.text(), '\n'); for (String ip : ips) { String[] parts = StringUtils.split(ip, ' '); if (parts.length == 2) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Add Google resolver IP range: " + parts[0]); } try { bit_subnets.add(Subnet.createInstance(parts[0])); subnets.add(parts[0]); } catch (UnknownHostException e) { LOGGER.error("Problem while adding Google resolver IP range: " + parts[0] + e); } } } if (subnets.size() == 0) { LOGGER.error("No Google resolvers found at url: " + url); } } else { LOGGER.error("No Google resolvers found at url: " + url); } }
From source file:no.kantega.publishing.admin.content.htmlfilter.CleanupFormHtmlFilter.java
@Override public Document runFilter(Document document) { Elements inputs = document.getElementsByTag("input"); for (Element input : inputs) { String type = input.attr("type"); if (isBlank(type)) { input.attr("type", "text"); }//from w ww. j a va 2 s .c om } return document; }
From source file:no.kantega.publishing.admin.content.htmlfilter.ConvertUnderlineToEditorStyleFilter.java
@Override public Document runFilter(Document document) { for (Element span : document.getElementsByTag("span")) { String style = span.attr("style"); if (isNotBlank(style)) { String textDecoration = getSubAttributeValue(style, "text-decoration"); if ("underline".equalsIgnoreCase(textDecoration)) { span.removeAttr("style"); span.tagName("u"); }// w ww . ja va 2 s. co m } } return document; }
From source file:no.kantega.publishing.admin.content.htmlfilter.ImgHeightAndWidthFilter.java
@Override public Document runFilter(Document document) { if (multimediaDao == null) { ApplicationContext context = RootContext.getInstance(); multimediaDao = context.getBean(MultimediaDao.class); imageEditor = context.getBean(ImageEditor.class); }//from ww w. j a v a 2 s . com for (Element img : document.getElementsByTag("img")) { String width = img.attr("width"); String height = img.attr("height"); if (isNotBlank(width) && isNoneBlank(height)) { try { int imageWidth = Integer.parseInt(width); int imageHeight = Integer.parseInt(height); String url = img.attr("src"); if (url != null) { List<Integer> ids = MultimediaHelper.getMultimediaIdsFromText(url); if (ids.size() == 1) { int multimediaId = ids.get(0); Multimedia image = multimediaDao.getMultimedia(multimediaId); if (imageWidth != image.getWidth() || imageHeight != image.getHeight()) { MultimediaDimensions d = imageEditor.getResizedImageDimensions(image.getWidth(), image.getHeight(), imageWidth, imageHeight); img.attr("height", String.valueOf(d.getHeight())); img.attr("width", String.valueOf(d.getWidth())); String imageUrl = image.getUrl(); img.attr("src", imageUrl + (imageUrl.contains("?") ? "&" : "?") + "width=" + d.getWidth()); } } } } catch (NumberFormatException e) { log.error("Could not parse number", e); } } } return document; }
From source file:no.kantega.publishing.admin.content.htmlfilter.ReplaceStyleAlignWithAttributeAlignFilter.java
@Override public Document runFilter(Document document) { for (String tag : tags) { for (Element element : document.getElementsByTag(tag)) { String style = element.attr("style"); if (isNotBlank(style)) { if (style.contains("right")) { element.attr("align", "right"); } else if (style.contains("left")) { element.attr("align", "left"); } else if (style.contains("center")) { element.attr("align", "center"); }/*from www . j av a 2s .c o m*/ element.removeAttr("style"); } } } return document; }
From source file:org.ambraproject.wombat.service.remote.EditorialContentApiImpl.java
/** * {@inheritDoc}/*from www . j a va2s .c o m*/ * <p/> * Applies transforms to HTML attributes and performs substitution of placeholder HTML elements with stored content */ @Override public Reader readHtml(final SitePageContext sitePageContext, String pageType, String key, final Set<HtmlElementTransformation> transformations, final Collection<HtmlElementSubstitution> substitutions) throws IOException { Map<String, Object> pageConfig = sitePageContext.getSite().getTheme().getConfigMap(pageType); ContentKey version = ContentKey.createForLatestVersion(key); // TODO May want to support page versioning at some point using fetchHtmlDirective CacheKey cacheKey = CacheKey.create(pageType, key); Number cacheTtl = (Number) pageConfig.get("cacheTtl"); if (cacheTtl != null) { cacheKey = cacheKey.addTimeToLive(cacheTtl.intValue()); } String transformedHtml = requestCachedReader(cacheKey, version, new CacheDeserializer<Reader, String>() { @Override public String read(Reader htmlReader) throws IOException { // It would be nice to feed the reader directly into the parser, but Jsoup's API makes this awkward. // The whole document will be in memory anyway, so buffering it into a string is no great performance loss. String htmlString = IOUtils.toString(htmlReader); Document document = Jsoup.parseBodyFragment(htmlString); for (HtmlElementTransformation transformation : transformations) { transformation.apply(sitePageContext, siteSet, document); } for (HtmlElementSubstitution substitution : substitutions) { substitution.substitute(document); } // We received a snippet, which Jsoup has automatically turned into a complete HTML document. // We want to return only the transformed snippet, so retrieve it from the body tag. return document.getElementsByTag("body").html(); } }); return new StringReader(transformedHtml); }
From source file:org.arb.extractor.DomTreeWalker.java
/** * This method provide TreeWalker the chance to go through all the source code units before * actual resource extraction. In Javascript processing, we collect all identifiers in this * pass./*w w w . ja v a2 s . co m*/ * * @param codeUnit AbstractCodeUnit instance that has all information related to a source file. */ @Override public void preprocessingPass(AbstractCodeUnit codeUnit) { Document doc = codeUnit.getDomDocument(); Elements elements = doc.getElementsByTag("html"); for (int i = 0; i < elements.size(); ++i) { collectIdsOnElement(elements.get(i), codeUnit); } }
From source file:org.arb.extractor.DomTreeWalker.java
/** * Rewrite the source code with resource extracted. * /*from www. j a va 2s . c o m*/ * @param codeUnit AbstractCodeUnit instance that has all information related to a source file. * @return source code after resource replacement. */ @Override public String rewriteSource(AbstractCodeUnit codeUnit) { // Apply all the replacements ArrayList<CodeReplacement> replacementList = codeUnit.getReplacementList(); for (CodeReplacement replacement : replacementList) { DomCodeReplacement domReplacement = (DomCodeReplacement) replacement; if (domReplacement.isNewId()) { if (domReplacement.shouldUseArbId()) { domReplacement.getElement().attr("arb:id", replacement.getResourceId()); } else { domReplacement.getElement().attr("id", replacement.getResourceId()); } } } // return things back Document doc = codeUnit.getDomDocument(); Elements elements = doc.getElementsByTag("html"); return elements.outerHtml(); }