X-Git-Url: http://andersk.mit.edu/gitweb/jira-zephyrbot.git/blobdiff_plain/0ebeacca656f0754e2800752465f1eaf332bd971..HEAD:/jirabot.py

diff --git a/jirabot.py b/jirabot.py
index 7c0c7d1..5495e5a 100755
--- a/jirabot.py
+++ b/jirabot.py
@@ -1,13 +1,14 @@
 #!/usr/bin/python
-import cStringIO
 import calendar
 import feedparser
 import formatter
+import htmlentitydefs
 import htmllib
 import mechanize
 import os
 import random
 import string
+import StringIO
 import time
 import traceback
 import urlparse
@@ -17,6 +18,19 @@ zephyr_sender = 'jira'
 zephyr_class = 'andersk-test'
 time_file = 'jirabot.time'
 
+class UnicodeHTMLParser(htmllib.HTMLParser):
+    entitydefs = dict((k, unichr(v)) for (k, v) in htmlentitydefs.name2codepoint.items())
+
+    def convert_charref(self, name):
+        try:
+            n = int(name)
+        except ValueError:
+            return
+        return self.convert_codepoint(n)
+
+    def convert_codepoint(self, codepoint):
+        return unichr(codepoint)
+
 def jira_init():
     b = mechanize.Browser()
     b.set_handle_robots(False)
@@ -40,6 +54,26 @@ def jira_login(b):
     b.select_form(nr=0)
     b.submit()
 
+def feed_to_zephyrs(thing, rss, parse):
+    zephyrs = []
+    try:
+        feed = feedparser.parse(rss)
+        for e in feed.entries:
+            global old_time, new_time
+            t = int(calendar.timegm(e.date_parsed))
+            if t <= old_time:
+                continue
+            if t > new_time:
+                new_time = t
+            try:
+                z = parse(e)
+            except:
+                z = zerror("Error parsing " + thing + ":\n" + e.id + "\n" + traceback.format_exc())
+            zephyrs.append((t, z))
+    except:
+        zephyrs.append((0, zerror("Error parsing " + thing + "s feed:\n" + traceback.format_exc())))
+    return zephyrs
+
 def parse_issue(e):
     issue = urlparse.urlparse(e.id)[2].rsplit('/', 1)[1]
     url = e.id
@@ -48,6 +82,7 @@ def parse_issue(e):
     return zephyr.ZNotice(
         sender=zephyr_sender,
         auth=False,
+        opcode='auto',
         cls=zephyr_class,
         instance=issue,
         fields=[e.title, msg],
@@ -57,18 +92,18 @@ def parse_comment(e):
     url = urlparse.urlunsplit(urlparse.urlparse(e.id)[0:3] + (None,None))
     issue = url.rsplit('/', 1)[1]
 
-    s = cStringIO.StringIO()
-    parser = htmllib.HTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
+    s = StringIO.StringIO()
+    parser = UnicodeHTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
     parser.feed(e.summary.rsplit('<table>', 1)[0])
     parser.close()
-    s.seek(0)
-    comment = s.read()
+    comment = s.getvalue()
 
     msg = e.author + " added a comment:\n" + comment.rstrip()
 
     return zephyr.ZNotice(
         sender=zephyr_sender,
         auth=False,
+        opcode='auto',
         cls=zephyr_class,
         instance=issue,
         fields=[e.title, msg],
@@ -78,54 +113,49 @@ def zerror(msg):
     return zephyr.ZNotice(
         sender=zephyr_sender,
         auth=False,
+        opcode='auto',
         cls=zephyr_class,
         instance='jira-error',
         fields=['Jira bot error', msg]
     )
 
 b = jira_init()
-
-jira_login(b)
-b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
-issues_rss = b.response().read()
-b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-comments-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
-comments_rss = b.response().read()
-
-time_file_new = time_file + '.' + ''.join(random.sample(string.letters, 8))
-
-try:
-    os.rename(time_file, time_file_new)
-except OSError:
-    exit()
-
-old_time = int(open(time_file_new).read())
-new_time = old_time
-
 zephyr.init()
-zephyrs = []
-
-for (thing, rss, parse) in [('issue', issues_rss, parse_issue),
-                            ('comment', comments_rss, parse_comment)]:
-    try:
-        feed = feedparser.parse(rss)
-        for e in feed.entries:
-            t = int(calendar.timegm(e.date_parsed))
-            if t <= old_time:
-                continue
-            if t > new_time:
-                new_time = t
-            try:
-                z = parse(e)
-            except:
-                z = zerror("Error parsing " + thing + ":\n" + e.id + "\n" + traceback.format_exc())
-            zephyrs.append((t, z))
-    except:
-        zephyrs.append((0, zerror("Error parsing " + thing + "s feed:\n" + traceback.format_exc())))
 
-open(time_file_new, 'w').write(str(new_time))
+count = 0
 
-os.rename(time_file_new, time_file)
+while True:
+    time_file_new = time_file + '.' + ''.join(random.sample(string.letters, 8))
 
-zephyrs.sort(key=lambda tz: tz[0])
-for (t, z) in zephyrs:
-    z.send()
+    try:
+        os.rename(time_file, time_file_new)
+    except OSError:
+        print "warning: could not acquire timestamp lock"
+        time.sleep(17)
+        continue
+
+    if (count >= 200):
+        b = jira_init()
+        count = 0
+    count += 1
+
+    jira_login(b)
+    b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
+    issues_rss = b.response().read()
+    b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-comments-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
+    comments_rss = b.response().read()
+    b.clear_history()
+
+    old_time = int(open(time_file_new).read())
+    new_time = old_time
+
+    zephyrs = (feed_to_zephyrs('issue', issues_rss, parse_issue) +
+               feed_to_zephyrs('comment', comments_rss, parse_comment))
+    zephyrs.sort(key=lambda tz: tz[0])
+    for (t, z) in zephyrs:
+        z.send()
+
+    open(time_file_new, 'w').write(str(new_time))
+    os.rename(time_file_new, time_file)
+
+    time.sleep(20)