Normalizing Character Data Before Output : XML Transform « XML « Python






Normalizing Character Data Before Output

 
import sys
from xml.parsers import expat

def normalize_whitespace(text):
    return " ".join(text.split())

class SimpleParse:
    def __init__(self):
        self.parser   = expat.ParserCreate()
        self.parser.StartElementHandler = self.start_element
        self.parser.EndElementHandler = self.end_element
        self.parser.CharacterDataHandler = self.character_data
        self.cdata = [ ]

    def parse(self,file):
        self.parser.ParseFile(file)

    def print_cdata(self):
        txt = normalize_whitespace("".join(self.cdata))
        if txt: print normalize_whitespace(txt)
        self.cdata = [ ]

    def start_element(self,name,attrs):
        self.print_cdata()
        print "Start:",name,attrs

    def character_data(self,data):
        self.cdata.append(data)

    def end_element(self,name):
        self.print_cdata()        
        print "End:", name

p = SimpleParse()
p.parse(open(sys.argv[1]))

   
  








Related examples in the same category

1.Transforming an XML Document Using _Document Methods
2.Transforming an XML Document from its Parse Tree