Java examples for HTML:JSoup
Parse html loaded from web using jsoup
import java.net.URL; import java.io.File; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.parser.Parser; import org.jsoup.select.Elements; import org.jsoup.nodes.Element; public class ParseWiki { public static void main(String[] args) throws Exception {//w w w .j a va 2s.com parseInfoBox(); test(); parsing(); } public static void test() throws IOException { Document doc = Jsoup.connect("http://en.wikipedia.org/wiki/Boston").get(); Element link = doc.select("a").first(); String text = doc.body().text(); // "An example link" String linkHref = link.attr("href"); // "http://example.com/" String linkText = link.text(); // "example"" String linkOuterH = link.outerHtml(); String linkInnerH = link.html(); // "<b>example</b>" } public static void parsing() throws Exception { Document doc = Jsoup.connect("http://en.wikipedia.org/wiki/boston").get(); Element contentDiv = doc.select("div[id=mw-content-text] > p").first(); String paragraph = contentDiv.text(); System.out.println(paragraph); // The result } public static void parseInfoBox() throws Exception { Document doc2 = Jsoup.connect("http://en.wikipedia.org/wiki/Tom_Cruise").get(); Element body = doc2.body(); Elements tables = body.getElementsByTag("table"); for (Element table : tables) { if (table.className().contains("infobox")==true) { System.out.println(table.outerHtml()); table.outerHtml(); break; } } } }