Here you can find the source of getDistinctImageUrls(String htmlContent)
img
tags).
Parameter | Description |
---|---|
htmlContent | the html content that may contain image files |
public static List<String> getDistinctImageUrls(String htmlContent)
//package com.java2s; //License from project: Apache License import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Main { private static final String IMG_SELECTOR = "img"; private static final String SRC_ATTR = "src"; /**// w ww. ja va2s.c o m * Finds all image inclusions (looks for <code>img</code> tags). Returns * only the path or URL to the image. If the several images have the same * path, the path is present in the list only one time. * * @param htmlContent * the html content that may contain image files * @return the list of found images (paths only) or empty if nothing found */ public static List<String> getDistinctImageUrls(String htmlContent) { Document doc = Jsoup.parse(htmlContent); Elements els = doc.select(IMG_SELECTOR); List<String> images = new ArrayList<>(els.size()); for (Element e : els) { String path = e.attr(SRC_ATTR); if (!images.contains(path)) { images.add(path); } } return images; } }