Java examples for HTML:JSoup
Trivial client for the date server using jsoup
import java.io.File; import java.io.FileWriter; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class WebCrawler { private static void print(String msg, Object... args) {/*from w w w . j a v a 2 s . c om*/ System.out.println(String.format(msg, args)); } private static String trim(String s, int width) { if (s.length() > width) return s.substring(0, width-1) + "."; else return s; } public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("http://your server/").timeout(30000).get(); Elements links = doc.select("a[href]"); File file = new File("D:/Documents/abc.html"); file.createNewFile(); FileWriter writer = new FileWriter(file); writer.write(doc.body().toString()); writer.flush(); writer.close(); print("\nLinks: (%d)", links.size()); for (Element link : links) { print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); } for (Element link : links) { doc = Jsoup.connect(link.attr("abs:href")).timeout(30000).userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21").ignoreHttpErrors(true).followRedirects(true).ignoreContentType(true).get(); file = new File("D:/" + trim(link.text(), 35) + ".html"); writer = new FileWriter(file); writer.write(doc.body().toString()); writer.flush(); writer.close(); } System.exit(0); } }