X-Git-Url: http://andersk.mit.edu/gitweb/jira-zephyrbot.git/blobdiff_plain/a41187bdd525d10fd5a5e1c3b2524736eb761480..f719e4334981f765a44a1ed0f5c7c5ab0312c24d:/jirabot.py

diff --git a/jirabot.py b/jirabot.py
old mode 100644
new mode 100755
index 45eeb4d..b9b5be4
--- a/jirabot.py
+++ b/jirabot.py
@@ -1,75 +1,142 @@
-import zephyr
+#!/usr/bin/python
+import cStringIO
+import calendar
 import feedparser
-import pycurl
-from BeautifulSoup import BeautifulSoup
-
-"""Screen scrapes jira.mit.edu. Supposedly. Doesn't actually work yet.
-
-You need to have a personal certificate named cert.pem in this
-directory to run this script. You also need the abovementioned scripts.
-The zephyr bindings can be gotten from ebroder's GitHub repository
-located: http://github.com/ebroder/python-zephyr/tree/master
-
-SSL server CA authentication is disabled because we don't know how to
-make curl stop complaining.
-
-Current difficulty is we are causing a NullPointer exception on
-(we think) touchstone's servers."""
-
-class Browser(object):
-    def __init__(self, **kwargs):
-        self.opts = kwargs
-        self.lastResult = None
-    def _callback(self, buf):
-        self.lastResult += buf
-    def get(self, url):
-        c = pycurl.Curl()
-        c.setopt(c.URL, url)
-        c.setopt(c.WRITEFUNCTION, self._callback)
-        for k,v in self.opts.items():
-            c.setopt(getattr(c, k), v)
-        self.lastResult = ''
-        c.perform()
-        return self.lastResult
-    def getPage(self, url):
-        return Page(url, self.get(url), self)
-
-class Page(object):
-    def __init__(self, url, contents, browser):
-        self.url = url
-        self.soup = BeautifulSoup(contents)
-        self.browser = browser
-    def submitForm(self, name, args ={}):
-        form = self.soup.find(name='form', attrs={"name": name})
-        if form == None: return False
-        dest = form.get('action', self.url)
-        # this has a lot of edge-cases that don't work
-        inputs = form.findAll(name='input')
-        vals = {}
-        for input in inputs:
-            vals[input[name]] = input[value]
-        for k,v in args.items():
-            vals[k] = v
-        return self.browser.getPage()
-
-
-b = Browser(
-    SSLCERT = "cert.pem",
-    SSLKEY = "cert.pem",
-    SSL_VERIFYPEER = 0,
-    COOKIEJAR = "jirabot.cookie",
-    FOLLOWLOCATION = 1,
-    AUTOREFERER = 1,
-    HTTPHEADER = ["Accept-Language: en-us,en;q=0.5"],
-)
-
-b.get("https://jira.mit.edu/jira/secure/Dashboard.jspa")
-page = b.getPage("https://jira.mit.edu/jira/secure/mit_login.jsp?os_destination=/secure/Dashboard.jspa")
-
-print page.submitForm('wayfForm1')
-
-#c.setopt(c.URL, "https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
-
-#d = feedparser.parse(t.contents)
-#print d
+import formatter
+import htmllib
+import mechanize
+import os
+import random
+import string
+import time
+import traceback
+import urlparse
+import zephyr
+
+zephyr_sender = 'jira'
+zephyr_class = 'andersk-test'
+time_file = 'jirabot.time'
+
+def jira_init():
+    b = mechanize.Browser()
+    b.set_handle_robots(False)
+    b.add_client_certificate("https://idp.mit.edu:9443", "cert.pem", "cert.pem")
+    b.addheaders = [("Accept-Language", "en-us,en;q=0.5"),]
+    return b
+
+def jira_login(b):
+    b.open("https://jira.mit.edu/jira/secure/Dashboard.jspa")
+    try:
+        b.follow_link(text="MIT Touchstone")
+    except mechanize.LinkNotFoundError:
+        return
+    if (urlparse.urlparse(b.geturl())[1] == "jira.mit.edu"):
+        return
+    b.select_form("wayfForm1")
+    b.submit()
+    b.select_form(predicate=lambda f: any(c.name == 'login_certificate'
+                                          for c in f.controls))
+    b.submit()
+    b.select_form(nr=0)
+    b.submit()
+
+def feed_to_zephyrs(thing, rss, parse):
+    zephyrs = []
+    try:
+        feed = feedparser.parse(rss)
+        for e in feed.entries:
+            global old_time, new_time
+            t = int(calendar.timegm(e.date_parsed))
+            if t <= old_time:
+                continue
+            if t > new_time:
+                new_time = t
+            try:
+                z = parse(e)
+            except:
+                z = zerror("Error parsing " + thing + ":\n" + e.id + "\n" + traceback.format_exc())
+            zephyrs.append((t, z))
+    except:
+        zephyrs.append((0, zerror("Error parsing " + thing + "s feed:\n" + traceback.format_exc())))
+    return zephyrs
+
+def parse_issue(e):
+    issue = urlparse.urlparse(e.id)[2].rsplit('/', 1)[1]
+    url = e.id
+    msg = e.id + "\nThis issue was updated."
+
+    return zephyr.ZNotice(
+        sender=zephyr_sender,
+        auth=False,
+        opcode='auto',
+        cls=zephyr_class,
+        instance=issue,
+        fields=[e.title, msg],
+    )
+
+def parse_comment(e):
+    url = urlparse.urlunsplit(urlparse.urlparse(e.id)[0:3] + (None,None))
+    issue = url.rsplit('/', 1)[1]
+
+    s = cStringIO.StringIO()
+    parser = htmllib.HTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
+    parser.feed(e.summary.rsplit('<table>', 1)[0])
+    parser.close()
+    s.seek(0)
+    comment = s.read()
+
+    msg = e.author + " added a comment:\n" + comment.rstrip()
+
+    return zephyr.ZNotice(
+        sender=zephyr_sender,
+        auth=False,
+        opcode='auto',
+        cls=zephyr_class,
+        instance=issue,
+        fields=[e.title, msg],
+    )
+
+def zerror(msg):
+    return zephyr.ZNotice(
+        sender=zephyr_sender,
+        auth=False,
+        opcode='auto',
+        cls=zephyr_class,
+        instance='jira-error',
+        fields=['Jira bot error', msg]
+    )
+
+b = jira_init()
+zephyr.init()
+
+while True:
+    time_file_new = time_file + '.' + ''.join(random.sample(string.letters, 8))
+
+    try:
+        os.rename(time_file, time_file_new)
+    except OSError:
+        print "warning: could not acquire timestamp lock"
+        time.sleep(17)
+        continue
+
+    jira_login(b)
+    b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
+    issues_rss = b.response().read()
+    b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-comments-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
+    comments_rss = b.response().read()
+
+    old_time = int(open(time_file_new).read())
+    new_time = old_time
+
+    zephyrs = (feed_to_zephyrs('issue', issues_rss, parse_issue) +
+               feed_to_zephyrs('comment', comments_rss, parse_comment))
+
+    open(time_file_new, 'w').write(str(new_time))
+
+    zephyrs.sort(key=lambda tz: tz[0])
+    for (t, z) in zephyrs:
+        z.send()
+
+    os.rename(time_file_new, time_file)
 
+    time.sleep(60)