1 // url.c -- Object representing uniform resource locators
2 // Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License version 2 as
6 // published by the Free Software Foundation.
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
13 // You should have received a copy of the GNU General Public License along
14 // with this program; if not, write to the Free Software Foundation, Inc.,
15 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 // In addition to these license terms, the author grants the following
20 // If you modify this program, or any covered work, by linking or
21 // combining it with the OpenSSL project's OpenSSL library (or a
22 // modified version of that library), containing parts covered by the
23 // terms of the OpenSSL or SSLeay licenses, the author
24 // grants you additional permission to convey the resulting work.
25 // Corresponding Source for a non-source form of such a combination
26 // shall include the source code for the parts of OpenSSL used as well
27 // as that of the covered work.
29 // You may at your option choose to remove this additional permission from
30 // the work, or from any part of it.
32 // It is possible to build this program in a way that it loads OpenSSL
33 // libraries at run-time. If doing so, the following notices are required
34 // by the OpenSSL and SSLeay licenses:
36 // This product includes software developed by the OpenSSL Project
37 // for use in the OpenSSL Toolkit. (http://www.openssl.org/)
39 // This product includes cryptographic software written by Eric Young
40 // (eay@cryptsoft.com)
43 // The most up-to-date version of this program is always available from
44 // http://shellinabox.com
48 #define _XOPEN_SOURCE 500
54 #include <strings.h> // for strncasecmp()
57 #include "libhttp/url.h"
59 #include "logging/logging.h"
61 static char *urlUnescape(char *s) {
64 for (char *u = s; *u; ) {
68 } else if (ch == '%') {
70 if ((c1 >= '0' && c1 <= '9') || ((c1 &= ~0x20) >= 'A' && c1 <= 'F')) {
71 ch = c1 - (c1 > '9' ? 'A' - 10 : '0');
73 if ((c2 >= '0' && c2 <= '9') || ((c2 &= ~0x20) >= 'A' && c2 <= 'F')) {
74 ch = (ch << 4) + c2 - (c2 > '9' ? 'A' - 10 : '0');
76 } else if (!warned++) {
77 warn("Malformed URL encoded data \"%s\"", r);
79 } else if (!warned++) {
80 warn("Malformed URL encoded data \"%s\"", r);
89 static void urlDestroyHashMapEntry(void *arg, char *key, char *value) {
95 static char *urlMakeString(const char *buf, int len) {
100 check(s = malloc(len + 1));
107 static void urlParseQueryString(struct URL *url, const char *query, int len) {
108 const char *key = query;
109 const char *value = NULL;
110 for (const char *ampersand = query; len-- >= 0; ampersand++) {
111 char ch = len >= 0 ? *ampersand : '\000';
112 if (ch == '=' && !value) {
113 value = ampersand + 1;
114 } else if (ch == '&' || len < 0) {
115 int kl = (value ? value-1 : ampersand) - key;
116 int vl = value ? ampersand - value : 0;
118 char *k = urlMakeString(key, kl);
122 v = urlMakeString(value, vl);
125 addToHashMap(&url->args, k, v);
136 static void urlParseHeaderLine(struct HashMap *hashmap, const char *s,
138 while (s && len > 0) {
139 while (len > 0 && (*s == ' ' || *s == ';')) {
144 const char *value = NULL;
145 while (len > 0 && *s != ';') {
146 if (*s == '=' && value == NULL) {
152 int kl = (value ? value-1 : s) - key;
153 int vl = value ? s - value : 0;
155 char *k = urlMakeString(key, kl);
156 for (char *t = k; *t; t++) {
157 if (*t >= 'a' && *t <= 'z') {
163 if (vl >= 2 && value[0] == '"' && value[vl-1] == '"') {
167 v = urlMakeString(value, vl);
169 addToHashMap(hashmap, k, v);
174 static const char *urlMemstr(const char *buf, int len, const char *s) {
175 int sLen = strlen(s);
179 while (len >= sLen) {
181 char *first = memchr(buf, *s, len - sLen);
188 if (!memcmp(buf, s, sLen)) {
197 static int urlMemcmp(const char *buf, int len, const char *s) {
198 int sLen = strlen(s);
202 return memcmp(buf, s, sLen);
206 static int urlMemcasecmp(const char *buf, int len, const char *s) {
207 int sLen = strlen(s);
211 return strncasecmp(buf, s, sLen);
215 static void urlParsePart(struct URL *url, const char *buf, int len) {
216 // Most browsers seem to forget quoting data in the header fields. This
217 // means, it is quite possible for an HTML form to cause the submission of
218 // unparseable "multipart/form-data". If this happens, we just give up
219 // and ignore the malformed data.
221 // <form method="POST" enctype="multipart/form-data">
222 // <input type="file" name="" X: x="">
223 // <input type="submit">
226 for (const char *eol; !!(eol = urlMemstr(buf, len, "\r\n")); ) {
231 char *value = len ? urlMakeString(buf, len) : NULL;
232 addToHashMap(&url->args, name, value);
237 if (!name && !urlMemcasecmp(buf, len, "content-disposition:")) {
238 struct HashMap fields;
239 initHashMap(&fields, urlDestroyHashMapEntry, NULL);
240 urlParseHeaderLine(&fields, buf + 20, eol - buf - 20);
241 if (getRefFromHashMap(&fields, "form-data")) {
242 // We currently don't bother to deal with binary files (e.g. files
243 // that include NUL characters). If this ever becomes necessary,
244 // we could check for the existence of a "filename" field and use
245 // that as an indicator to store the payload in something other
247 name = (char *)getFromHashMap(&fields, "name");
249 check(name = strdup(name));
252 destroyHashMap(&fields);
254 len -= eol - buf + 2;
261 static void urlParsePostBody(struct URL *url,
262 const struct HttpConnection *http,
263 const char *buf, int len) {
264 struct HashMap contentType;
265 initHashMap(&contentType, urlDestroyHashMapEntry, NULL);
266 const char *ctHeader = getFromHashMap(&http->header, "content-type");
267 urlParseHeaderLine(&contentType, ctHeader, ctHeader ? strlen(ctHeader) : 0);
268 if (getRefFromHashMap(&contentType, "application/x-www-form-urlencoded")) {
269 urlParseQueryString(url, buf, len);
270 } else if (getRefFromHashMap(&contentType, "multipart/form-data")) {
271 const char *boundary = getFromHashMap(&contentType, "boundary");
272 if (boundary && *boundary) {
273 const char *lastPart = NULL;
274 for (const char *part = buf; len > 0; ) {
276 if ((part == buf && (ptr = urlMemstr(part, len, "--")) != NULL) ||
277 (ptr = urlMemstr(part, len, "\r\n--")) != NULL) {
278 len -= ptr - part + (part == buf ? 2 : 4);
279 part = ptr + (part == buf ? 2 : 4);
280 if (!urlMemcmp(part, len, boundary)) {
281 int i = strlen(boundary);
284 if (!urlMemcmp(part, len, "\r\n")) {
288 urlParsePart(url, lastPart, ptr - lastPart);
291 info("Ignoring prologue before \"multipart/form-data\"");
295 } else if (!urlMemcmp(part, len, "--\r\n")) {
298 urlParsePart(url, lastPart, ptr - lastPart);
301 info("Ignoring epilogue past end of \"multipart/"
309 warn("Missing final \"boundary\" for \"multipart/form-data\"");
312 warn("Missing \"boundary\" information for \"multipart/form-data\"");
315 destroyHashMap(&contentType);
318 struct URL *newURL(const struct HttpConnection *http,
319 const char *buf, int len) {
321 check(url = malloc(sizeof(struct URL)));
322 initURL(url, http, buf, len);
326 void initURL(struct URL *url, const struct HttpConnection *http,
327 const char *buf, int len) {
328 url->protocol = strdup(httpGetProtocol(http));
330 url->password = NULL;
331 url->host = strdup(httpGetHost(http));
332 url->port = httpGetPort(http);
333 url->path = strdup(httpGetPath(http));
334 url->pathinfo = strdup(httpGetPathInfo(http));
335 url->query = strdup(httpGetQuery(http));
338 initHashMap(&url->args, urlDestroyHashMapEntry, NULL);
339 if (!strcmp(http->method, "GET")) {
340 urlParseQueryString(url, url->query, strlen(url->query));
341 } else if (!strcmp(http->method, "POST")) {
342 urlParsePostBody(url, http, buf, len);
346 void destroyURL(struct URL *url) {
357 destroyHashMap(&url->args);
361 void deleteURL(struct URL *url) {
366 const char *urlGetProtocol(struct URL *url) {
367 return url->protocol;
370 const char *urlGetUser(struct URL *url) {
374 const char *urlGetPassword(struct URL *url) {
375 return url->password;
378 const char *urlGetHost(struct URL *url) {
382 int urlGetPort(struct URL *url) {
386 const char *urlGetPath(struct URL *url) {
390 const char *urlGetPathInfo(struct URL *url) {
391 return url->pathinfo;
394 const char *urlGetQuery(struct URL *url) {
398 const char *urlGetAnchor(struct URL *url) {
402 const char *urlGetURL(struct URL *url) {
404 const char *host = urlGetHost(url);
405 int s_size = 8 + strlen(host) + 25 + strlen(url->path);
406 check(*(char **)&url->url = malloc(s_size + 1));
408 strncat(url->url, url->protocol, s_size);
409 strncat(url->url, "://", s_size);
410 strncat(url->url, host, s_size);
411 if (url->port != (strcmp(url->protocol, "http") ? 443 : 80)) {
412 snprintf(strrchr(url->url, '\000'), 25, ":%d", url->port);
414 strncat(url->url, url->path, s_size);
419 const struct HashMap *urlGetArgs(struct URL *url) {