]>
Commit | Line | Data |
---|---|---|
b9916fe5 EY |
1 | import zephyr |
2 | import feedparser | |
3 | import pycurl | |
a41187bd | 4 | from BeautifulSoup import BeautifulSoup |
b9916fe5 EY |
5 | |
6 | """Screen scrapes jira.mit.edu. Supposedly. Doesn't actually work yet. | |
7 | ||
8 | You need to have a personal certificate named cert.pem in this | |
9 | directory to run this script. You also need the abovementioned scripts. | |
10 | The zephyr bindings can be gotten from ebroder's GitHub repository | |
11 | located: http://github.com/ebroder/python-zephyr/tree/master | |
12 | ||
13 | SSL server CA authentication is disabled because we don't know how to | |
14 | make curl stop complaining. | |
15 | ||
16 | Current difficulty is we are causing a NullPointer exception on | |
17 | (we think) touchstone's servers.""" | |
18 | ||
19 | class Browser(object): | |
20 | def __init__(self, **kwargs): | |
21 | self.opts = kwargs | |
22 | self.lastResult = None | |
23 | def _callback(self, buf): | |
24 | self.lastResult += buf | |
25 | def get(self, url): | |
26 | c = pycurl.Curl() | |
27 | c.setopt(c.URL, url) | |
28 | c.setopt(c.WRITEFUNCTION, self._callback) | |
29 | for k,v in self.opts.items(): | |
30 | c.setopt(getattr(c, k), v) | |
31 | self.lastResult = '' | |
32 | c.perform() | |
33 | return self.lastResult | |
a41187bd EY |
34 | def getPage(self, url): |
35 | return Page(url, self.get(url), self) | |
36 | ||
37 | class Page(object): | |
38 | def __init__(self, url, contents, browser): | |
39 | self.url = url | |
40 | self.soup = BeautifulSoup(contents) | |
41 | self.browser = browser | |
42 | def submitForm(self, name, args ={}): | |
43 | form = self.soup.find(name='form', attrs={"name": name}) | |
44 | if form == None: return False | |
45 | dest = form.get('action', self.url) | |
46 | # this has a lot of edge-cases that don't work | |
47 | inputs = form.findAll(name='input') | |
48 | vals = {} | |
49 | for input in inputs: | |
50 | vals[input[name]] = input[value] | |
51 | for k,v in args.items(): | |
52 | vals[k] = v | |
53 | return self.browser.getPage() | |
54 | ||
b9916fe5 EY |
55 | |
56 | b = Browser( | |
57 | SSLCERT = "cert.pem", | |
58 | SSLKEY = "cert.pem", | |
59 | SSL_VERIFYPEER = 0, | |
60 | COOKIEJAR = "jirabot.cookie", | |
61 | FOLLOWLOCATION = 1, | |
a41187bd EY |
62 | AUTOREFERER = 1, |
63 | HTTPHEADER = ["Accept-Language: en-us,en;q=0.5"], | |
b9916fe5 EY |
64 | ) |
65 | ||
66 | b.get("https://jira.mit.edu/jira/secure/Dashboard.jspa") | |
a41187bd EY |
67 | page = b.getPage("https://jira.mit.edu/jira/secure/mit_login.jsp?os_destination=/secure/Dashboard.jspa") |
68 | ||
69 | print page.submitForm('wayfForm1') | |
b9916fe5 EY |
70 | |
71 | #c.setopt(c.URL, "https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000") | |
72 | ||
73 | #d = feedparser.parse(t.contents) | |
74 | #print d | |
75 |