Java examples for HTML:JSoup
Parse web Page using jsoup
package util;/* w w w. j a v a 2s . c om*/ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Set; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class ParsePage { public static boolean isAlpha(char c) { if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) return true; return false; } public static HashMap<String, String> parsePage(String url) { ArrayList<String> list = new ArrayList<String>(); HashMap<String, String> map = new HashMap<String, String>(); try { Document doc = Jsoup.connect(url).get(); Elements els = doc.select("div#cat_page"); String sub_page = doc.select("div#cat_page").html(); System.out.println(doc.select("div#cat_page").html()); Document sub_doc = Jsoup.parse(sub_page); Elements list_items = sub_doc.getElementsByTag("li"); System.out.println("list items:"+list_items.size()); for (Element e : list_items) { String word = e.text(); String wordURL = e.child(0).attr("href"); char[] wordletter = word.toCharArray(); if (!isAlpha(wordletter[wordletter.length - 1])) { word = word.substring(0, wordletter.length - 1); } map.put(word, wordURL); } } catch (IOException e) { System.out.println(e.getMessage()); } return map; } public static void main(String[] args) { String url = "http://www.java2s.com"; HashMap<String, String> map = parsePage(url); Set<String> set = map.keySet(); for (String s : set) { System.out.println(s+" "+map.get(s)); } System.out.println(set.size()); } }