+class UnicodeHTMLParser(htmllib.HTMLParser):
+ entitydefs = dict((k, unichr(v)) for (k, v) in htmlentitydefs.name2codepoint.items())
+
+ def convert_charref(self, name):
+ try:
+ n = int(name)
+ except ValueError:
+ return
+ return self.convert_codepoint(n)
+
+ def convert_codepoint(self, codepoint):
+ return unichr(codepoint)
+