#!/usr/bin/python2.3 import sys import re from HTMLParser import HTMLParser from xmllookup import acronize from HTMLParser import HTMLParser class CustomizedParser(HTMLParser): def setOutfileName(self, argOutfileName): """Remember the output file, so it is easy to write to it. """ self.OutfileName = argOutfileName #print 'Opening:', self.OutfileName self.Outfile = open(self.OutfileName, "w") self.CANSUB = 0 self.tagsoup = [] def closeOutfile(self): self.Outfile.close() def write(self, argString): self.Outfile.write(argString) def handle_starttag(self, argTag, argAttrs): """ argAttrs is a list of tuples. Each tuple is a pair of (attribute_name, attribute_value) """ self.tagsoup.append(argTag) if 'head' or 'abbr' or 'acronym' not in tagsoup: self.CANSUB = 1 else: self.CANSUB = 0 attributes = "".join([' %s="%s"' % (key, value) for key, value in argAttrs]) self.Outfile.write("<%s%s>" % (argTag, attributes)) def handle_startendtag(self, argTag, argAttrs): """ argAttrs is a list of tuples. Each tuple is a pair of (attribute_name, attribute_value) """ attributes = "".join([' %s="%s"' % (key, value) for key, value in argAttrs]) self.Outfile.write("<%s%s/>" % (argTag, attributes)) def handle_endtag(self, argTag): self.write("" % argTag) def handle_data(self, argString): if self.CANSUB: argString = acronize(argString) self.write(argString) def handle_charref(self, argString): self.write("&#%s;" % argString) def handle_entityref(self, argString): self.write("&%s;" % argString) def handle_comment(self, argString): self.write("" % argString) def handle_decl(self, argString): self.write("" % argString) def handle_pi(self, argString): # handle a processing instruction self.write("" % argString) def main(myInfileName, myOutfileName ): myInfile = open(myInfileName, "r") myParser = CustomizedParser() myParser.setOutfileName(myOutfileName) myParser.feed(myInfile.read()) myInfile.close() myParser.closeOutfile() def dq(s): """Enclose a string argument in double quotes""" return '"'+ s + '"' if __name__ == "__main__": for file in sys.argv[1:]: head, tail = os.path.split(file) main(file, os.path.join(head, 'A' + tail))