Parsing the HTML of a Web Page
import htmllib, urllib, formatter, sys
def parse(url, formatter):
f = urllib.urlopen(url)
data = f.read()
f.close()
p = htmllib.HTMLParser(formatter)
p.feed(data)
p.close()
fmt = formatter.AbstractFormatter(formatter.DumbWriter(sys.stdout))
parse("index.htm", fmt)
Related examples in the same category