X-Git-Url: http://andersk.mit.edu/gitweb/jira-zephyrbot.git/blobdiff_plain/a41187bdd525d10fd5a5e1c3b2524736eb761480..f719e4334981f765a44a1ed0f5c7c5ab0312c24d:/jirabot.py diff --git a/jirabot.py b/jirabot.py old mode 100644 new mode 100755 index 45eeb4d..b9b5be4 --- a/jirabot.py +++ b/jirabot.py @@ -1,75 +1,142 @@ -import zephyr +#!/usr/bin/python +import cStringIO +import calendar import feedparser -import pycurl -from BeautifulSoup import BeautifulSoup - -"""Screen scrapes jira.mit.edu. Supposedly. Doesn't actually work yet. - -You need to have a personal certificate named cert.pem in this -directory to run this script. You also need the abovementioned scripts. -The zephyr bindings can be gotten from ebroder's GitHub repository -located: http://github.com/ebroder/python-zephyr/tree/master - -SSL server CA authentication is disabled because we don't know how to -make curl stop complaining. - -Current difficulty is we are causing a NullPointer exception on -(we think) touchstone's servers.""" - -class Browser(object): - def __init__(self, **kwargs): - self.opts = kwargs - self.lastResult = None - def _callback(self, buf): - self.lastResult += buf - def get(self, url): - c = pycurl.Curl() - c.setopt(c.URL, url) - c.setopt(c.WRITEFUNCTION, self._callback) - for k,v in self.opts.items(): - c.setopt(getattr(c, k), v) - self.lastResult = '' - c.perform() - return self.lastResult - def getPage(self, url): - return Page(url, self.get(url), self) - -class Page(object): - def __init__(self, url, contents, browser): - self.url = url - self.soup = BeautifulSoup(contents) - self.browser = browser - def submitForm(self, name, args ={}): - form = self.soup.find(name='form', attrs={"name": name}) - if form == None: return False - dest = form.get('action', self.url) - # this has a lot of edge-cases that don't work - inputs = form.findAll(name='input') - vals = {} - for input in inputs: - vals[input[name]] = input[value] - for k,v in args.items(): - vals[k] = v - return self.browser.getPage() - - -b = Browser( - SSLCERT = "cert.pem", - SSLKEY = "cert.pem", - SSL_VERIFYPEER = 0, - COOKIEJAR = "jirabot.cookie", - FOLLOWLOCATION = 1, - AUTOREFERER = 1, - HTTPHEADER = ["Accept-Language: en-us,en;q=0.5"], -) - -b.get("https://jira.mit.edu/jira/secure/Dashboard.jspa") -page = b.getPage("https://jira.mit.edu/jira/secure/mit_login.jsp?os_destination=/secure/Dashboard.jspa") - -print page.submitForm('wayfForm1') - -#c.setopt(c.URL, "https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000") - -#d = feedparser.parse(t.contents) -#print d +import formatter +import htmllib +import mechanize +import os +import random +import string +import time +import traceback +import urlparse +import zephyr + +zephyr_sender = 'jira' +zephyr_class = 'andersk-test' +time_file = 'jirabot.time' + +def jira_init(): + b = mechanize.Browser() + b.set_handle_robots(False) + b.add_client_certificate("https://idp.mit.edu:9443", "cert.pem", "cert.pem") + b.addheaders = [("Accept-Language", "en-us,en;q=0.5"),] + return b + +def jira_login(b): + b.open("https://jira.mit.edu/jira/secure/Dashboard.jspa") + try: + b.follow_link(text="MIT Touchstone") + except mechanize.LinkNotFoundError: + return + if (urlparse.urlparse(b.geturl())[1] == "jira.mit.edu"): + return + b.select_form("wayfForm1") + b.submit() + b.select_form(predicate=lambda f: any(c.name == 'login_certificate' + for c in f.controls)) + b.submit() + b.select_form(nr=0) + b.submit() + +def feed_to_zephyrs(thing, rss, parse): + zephyrs = [] + try: + feed = feedparser.parse(rss) + for e in feed.entries: + global old_time, new_time + t = int(calendar.timegm(e.date_parsed)) + if t <= old_time: + continue + if t > new_time: + new_time = t + try: + z = parse(e) + except: + z = zerror("Error parsing " + thing + ":\n" + e.id + "\n" + traceback.format_exc()) + zephyrs.append((t, z)) + except: + zephyrs.append((0, zerror("Error parsing " + thing + "s feed:\n" + traceback.format_exc()))) + return zephyrs + +def parse_issue(e): + issue = urlparse.urlparse(e.id)[2].rsplit('/', 1)[1] + url = e.id + msg = e.id + "\nThis issue was updated." + + return zephyr.ZNotice( + sender=zephyr_sender, + auth=False, + opcode='auto', + cls=zephyr_class, + instance=issue, + fields=[e.title, msg], + ) + +def parse_comment(e): + url = urlparse.urlunsplit(urlparse.urlparse(e.id)[0:3] + (None,None)) + issue = url.rsplit('/', 1)[1] + + s = cStringIO.StringIO() + parser = htmllib.HTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s))) + parser.feed(e.summary.rsplit('', 1)[0]) + parser.close() + s.seek(0) + comment = s.read() + + msg = e.author + " added a comment:\n" + comment.rstrip() + + return zephyr.ZNotice( + sender=zephyr_sender, + auth=False, + opcode='auto', + cls=zephyr_class, + instance=issue, + fields=[e.title, msg], + ) + +def zerror(msg): + return zephyr.ZNotice( + sender=zephyr_sender, + auth=False, + opcode='auto', + cls=zephyr_class, + instance='jira-error', + fields=['Jira bot error', msg] + ) + +b = jira_init() +zephyr.init() + +while True: + time_file_new = time_file + '.' + ''.join(random.sample(string.letters, 8)) + + try: + os.rename(time_file, time_file_new) + except OSError: + print "warning: could not acquire timestamp lock" + time.sleep(17) + continue + + jira_login(b) + b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000") + issues_rss = b.response().read() + b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-comments-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000") + comments_rss = b.response().read() + + old_time = int(open(time_file_new).read()) + new_time = old_time + + zephyrs = (feed_to_zephyrs('issue', issues_rss, parse_issue) + + feed_to_zephyrs('comment', comments_rss, parse_comment)) + + open(time_file_new, 'w').write(str(new_time)) + + zephyrs.sort(key=lambda tz: tz[0]) + for (t, z) in zephyrs: + z.send() + + os.rename(time_file_new, time_file) + time.sleep(60)