import sgmllib
import string
filename = "index.html"
class CleanExit(Exception):
pass
class Titlefinder(sgmllib.SGMLParser):
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.title = self.data = None
def start_title(self, attributes):
self.data = []
def end_title(self):
self.title = string.join(self.data, "")
raise CleanExit
def handle_data(self, data):
if self.data is not None:
self.data.append(data)
def get_title(filehandle):
Parser = Titlefinder()
try:
while 1:
sgmldata = filehandle.read(1024)
if not sgmldata:
break
Parser.feed(sgmldata)
Parser.close()
except CleanExit:
return Parser.title
return None
filehandle = open(filename)
title = get_title(filehandle)
print "The page's title is: %s" % (title)
20.7.sgmllib |
| 20.7.1. | Use SGML paraser |