]> andersk Git - jira-zephyrbot.git/commitdiff
Make HTMLParser handle Unicode.
authorAnders Kaseorg <andersk@mit.edu>
Wed, 18 Feb 2009 21:05:14 +0000 (16:05 -0500)
committerAnders Kaseorg <andersk@mit.edu>
Wed, 18 Feb 2009 21:05:14 +0000 (16:05 -0500)
jirabot.py

index 8c2372114b45270522ee4f23cd3e746a5d2d8362..2fee41dd45cbc2e9637005ff554d51ca8aeff4c5 100755 (executable)
@@ -1,13 +1,14 @@
 #!/usr/bin/python
-import cStringIO
 import calendar
 import feedparser
 import formatter
+import htmlentitydefs
 import htmllib
 import mechanize
 import os
 import random
 import string
+import StringIO
 import time
 import traceback
 import urlparse
@@ -17,6 +18,19 @@ zephyr_sender = 'jira'
 zephyr_class = 'andersk-test'
 time_file = 'jirabot.time'
 
+class UnicodeHTMLParser(htmllib.HTMLParser):
+    entitydefs = dict((k, unichr(v)) for (k, v) in htmlentitydefs.name2codepoint.items())
+
+    def convert_charref(self, name):
+        try:
+            n = int(name)
+        except ValueError:
+            return
+        return self.convert_codepoint(n)
+
+    def convert_codepoint(self, codepoint):
+        return unichr(codepoint)
+
 def jira_init():
     b = mechanize.Browser()
     b.set_handle_robots(False)
@@ -78,12 +92,11 @@ def parse_comment(e):
     url = urlparse.urlunsplit(urlparse.urlparse(e.id)[0:3] + (None,None))
     issue = url.rsplit('/', 1)[1]
 
-    s = cStringIO.StringIO()
-    parser = htmllib.HTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
+    s = StringIO.StringIO()
+    parser = UnicodeHTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
     parser.feed(e.summary.rsplit('<table>', 1)[0])
     parser.close()
-    s.seek(0)
-    comment = s.read()
+    comment = s.getvalue()
 
     msg = e.author + " added a comment:\n" + comment.rstrip()
 
This page took 0.292135 seconds and 5 git commands to generate.