Example usage for org.jsoup.nodes Document getElementsByTag

List of usage examples for org.jsoup.nodes Document getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:com.kylemsguy.fishyfishes.MainActivity.java

public String extractParagraphs(String xmlText) {
    StringBuilder b = new StringBuilder();
    try {/*from   w w  w.  j a  v  a  2 s .c o  m*/
        org.jsoup.nodes.Document res = Jsoup.parse(xmlText);
        Elements pList = res.getElementsByTag("p");
        int i = 1;
        while (i < pList.size()) {
            b.append(pList.get(i).toString());
            b.append('\n');
            i++;
        }
        return b.toString();
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}

From source file:github.popeen.dsub.fragments.SelectPodcastsFragment.java

private String getRssFeed(String url) {
    String url2 = url;//from   www  . j  a v a  2  s  .  co  m
    try {
        if (url.toLowerCase().contains("itunes.apple.com")) {
            Pattern pattern = Pattern.compile("/id([0-9]*)");
            Matcher matcher = pattern.matcher(url);
            if (matcher.find()) {
                try {
                    String raw = KakaduaUtil.http_get_contents(
                            "https://itunes.apple.com/lookup?id=" + matcher.group(1) + "&entity=podcast");
                    url2 = new JSONObject(raw).getJSONArray("results").getJSONObject(0).getString("feedUrl");
                    Log.w("podcast", url2);
                } catch (Exception e) {
                    Log.w("podcast", e.toString());
                }
            }

        } else if (url.toLowerCase().contains("soundcloud.com")) {
            try {
                Document doc = Jsoup.connect(url).get();
                Elements metas = doc.getElementsByTag("meta");
                for (Element meta : metas) {
                    if (meta.attr("property").equals("al:android:url")) {
                        String id = meta.attr("content").replace("soundcloud://users:", "");
                        url2 = "https://feeds.soundcloud.com/users/soundcloud:users:" + id + "/sounds.rss";
                        Log.w("podcast", url2);
                    }
                }
            } catch (Exception e) {
                Log.w("podcast", e.toString());
            }
        } else if (url.toLowerCase().contains("player.fm/series")) {
            try {
                Document doc = Jsoup.connect(url).get();
                Elements links = doc.select(".blatant");
                for (Element link : links) {
                    if (link.text().equals("Public Feed")) {
                        url2 = link.attr("href");
                        Log.w("podcast", url2);
                    }
                }
            } catch (Exception e) {
                Log.w("podcast", e.toString());
            }
        } else if (url.toLowerCase().contains("acast.com") || url.toLowerCase().contains("podbean.com")) {
            /*
            TODO,
            This uses a standard tag that most podcasting websites support.
            Make it always try this if the url entered is not a valid feed or a website with a specific conversion above like for example iTunes.
            If it fails it needs to prevent the app from adding it to the server and then show an error message
             */
            try {
                Document doc = Jsoup.connect(url).get();
                Elements links = doc.select("link");
                for (Element link : links) {
                    if (link.attr("type").equals("application/rss+xml")) {
                        url2 = link.attr("href");
                        Log.w("podcast", url2);
                    }
                }
            } catch (Exception e) {
                Log.w("podcast", e.toString());
            }
        }
    } catch (Exception e) {
        Log.w("podcast", e.toString());
    }
    return url2;
}

From source file:nl.sidn.pcap.ip.GoogleResolverCheck.java

@Override
protected void init() {
    String url = Settings.getInstance().getSetting(Settings.RESOLVER_LIST_GOOGLE);
    LOGGER.info("Load Google resolver addresses from url: " + url);

    Document doc = null;
    try {//  ww w. ja  va  2  s .  co  m
        doc = Jsoup.connect(url).get();
    } catch (Exception e) {
        LOGGER.error("Problem while getting Google resolvers url: " + url);
        return;
    }

    Elements tags = doc.getElementsByTag("pre");
    if (tags.size() == 2) {
        Element resolvers = tags.get(0);
        //Element resolver = codes.get(0);
        String[] ips = StringUtils.split(resolvers.text(), '\n');
        for (String ip : ips) {
            String[] parts = StringUtils.split(ip, ' ');
            if (parts.length == 2) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Add Google resolver IP range: " + parts[0]);
                }

                try {
                    bit_subnets.add(Subnet.createInstance(parts[0]));
                    subnets.add(parts[0]);
                } catch (UnknownHostException e) {
                    LOGGER.error("Problem while adding Google resolver IP range: " + parts[0] + e);
                }
            }
        }

        if (subnets.size() == 0) {
            LOGGER.error("No Google resolvers found at url: " + url);
        }
    } else {
        LOGGER.error("No Google resolvers found at url: " + url);
    }
}

From source file:no.kantega.publishing.admin.content.htmlfilter.CleanupFormHtmlFilter.java

@Override
public Document runFilter(Document document) {
    Elements inputs = document.getElementsByTag("input");
    for (Element input : inputs) {
        String type = input.attr("type");
        if (isBlank(type)) {
            input.attr("type", "text");
        }//from  w ww. j  a  va 2  s .c  om
    }
    return document;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ConvertUnderlineToEditorStyleFilter.java

@Override
public Document runFilter(Document document) {
    for (Element span : document.getElementsByTag("span")) {
        String style = span.attr("style");
        if (isNotBlank(style)) {
            String textDecoration = getSubAttributeValue(style, "text-decoration");
            if ("underline".equalsIgnoreCase(textDecoration)) {
                span.removeAttr("style");
                span.tagName("u");
            }// w  ww . ja va  2 s. co  m
        }
    }
    return document;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ImgHeightAndWidthFilter.java

@Override
public Document runFilter(Document document) {
    if (multimediaDao == null) {
        ApplicationContext context = RootContext.getInstance();
        multimediaDao = context.getBean(MultimediaDao.class);
        imageEditor = context.getBean(ImageEditor.class);
    }//from   ww w.  j a v  a 2  s . com

    for (Element img : document.getElementsByTag("img")) {
        String width = img.attr("width");
        String height = img.attr("height");
        if (isNotBlank(width) && isNoneBlank(height)) {
            try {
                int imageWidth = Integer.parseInt(width);
                int imageHeight = Integer.parseInt(height);

                String url = img.attr("src");
                if (url != null) {
                    List<Integer> ids = MultimediaHelper.getMultimediaIdsFromText(url);
                    if (ids.size() == 1) {
                        int multimediaId = ids.get(0);
                        Multimedia image = multimediaDao.getMultimedia(multimediaId);
                        if (imageWidth != image.getWidth() || imageHeight != image.getHeight()) {
                            MultimediaDimensions d = imageEditor.getResizedImageDimensions(image.getWidth(),
                                    image.getHeight(), imageWidth, imageHeight);
                            img.attr("height", String.valueOf(d.getHeight()));
                            img.attr("width", String.valueOf(d.getWidth()));
                            String imageUrl = image.getUrl();
                            img.attr("src",
                                    imageUrl + (imageUrl.contains("?") ? "&" : "?") + "width=" + d.getWidth());

                        }
                    }
                }

            } catch (NumberFormatException e) {
                log.error("Could not parse number", e);
            }
        }
    }
    return document;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ReplaceStyleAlignWithAttributeAlignFilter.java

@Override
public Document runFilter(Document document) {
    for (String tag : tags) {
        for (Element element : document.getElementsByTag(tag)) {
            String style = element.attr("style");
            if (isNotBlank(style)) {
                if (style.contains("right")) {
                    element.attr("align", "right");
                } else if (style.contains("left")) {
                    element.attr("align", "left");
                } else if (style.contains("center")) {
                    element.attr("align", "center");
                }/*from  www  .  j  av  a 2s  .c  o m*/
                element.removeAttr("style");
            }
        }
    }
    return document;
}

From source file:org.ambraproject.wombat.service.remote.EditorialContentApiImpl.java

/**
 * {@inheritDoc}/*from   www  .  j a  va2s .c o m*/
 * <p/>
 * Applies transforms to HTML attributes and performs substitution of placeholder HTML elements with stored content
 */
@Override
public Reader readHtml(final SitePageContext sitePageContext, String pageType, String key,
        final Set<HtmlElementTransformation> transformations,
        final Collection<HtmlElementSubstitution> substitutions) throws IOException {
    Map<String, Object> pageConfig = sitePageContext.getSite().getTheme().getConfigMap(pageType);
    ContentKey version = ContentKey.createForLatestVersion(key); // TODO May want to support page versioning at some point using fetchHtmlDirective
    CacheKey cacheKey = CacheKey.create(pageType, key);
    Number cacheTtl = (Number) pageConfig.get("cacheTtl");
    if (cacheTtl != null) {
        cacheKey = cacheKey.addTimeToLive(cacheTtl.intValue());
    }

    String transformedHtml = requestCachedReader(cacheKey, version, new CacheDeserializer<Reader, String>() {
        @Override
        public String read(Reader htmlReader) throws IOException {
            // It would be nice to feed the reader directly into the parser, but Jsoup's API makes this awkward.
            // The whole document will be in memory anyway, so buffering it into a string is no great performance loss.
            String htmlString = IOUtils.toString(htmlReader);
            Document document = Jsoup.parseBodyFragment(htmlString);

            for (HtmlElementTransformation transformation : transformations) {
                transformation.apply(sitePageContext, siteSet, document);
            }
            for (HtmlElementSubstitution substitution : substitutions) {
                substitution.substitute(document);
            }

            // We received a snippet, which Jsoup has automatically turned into a complete HTML document.
            // We want to return only the transformed snippet, so retrieve it from the body tag.
            return document.getElementsByTag("body").html();
        }
    });
    return new StringReader(transformedHtml);
}

From source file:org.arb.extractor.DomTreeWalker.java

/**
 * This method provide TreeWalker the chance to go through all the source code units before
 * actual resource extraction. In Javascript processing, we collect all identifiers in this
 * pass./*w w  w . ja v a2 s  . co m*/
 * 
 * @param codeUnit AbstractCodeUnit instance that has all information related to a source file.
 */
@Override
public void preprocessingPass(AbstractCodeUnit codeUnit) {
    Document doc = codeUnit.getDomDocument();
    Elements elements = doc.getElementsByTag("html");
    for (int i = 0; i < elements.size(); ++i) {
        collectIdsOnElement(elements.get(i), codeUnit);
    }
}

From source file:org.arb.extractor.DomTreeWalker.java

/**
 * Rewrite the source code with resource extracted.
 * /*from www.  j  a  va  2s . c o m*/
 * @param codeUnit AbstractCodeUnit instance that has all information related to a source file.
 * @return source code after resource replacement. 
 */
@Override
public String rewriteSource(AbstractCodeUnit codeUnit) {
    // Apply all the replacements 
    ArrayList<CodeReplacement> replacementList = codeUnit.getReplacementList();
    for (CodeReplacement replacement : replacementList) {
        DomCodeReplacement domReplacement = (DomCodeReplacement) replacement;
        if (domReplacement.isNewId()) {
            if (domReplacement.shouldUseArbId()) {
                domReplacement.getElement().attr("arb:id", replacement.getResourceId());
            } else {
                domReplacement.getElement().attr("id", replacement.getResourceId());
            }
        }
    }

    // return things back
    Document doc = codeUnit.getDomDocument();
    Elements elements = doc.getElementsByTag("html");
    return elements.outerHtml();
}