xref: /netbsd-src/external/bsd/ipf/dist/lib/load_http.c (revision 13885a665959c62f13a82b3caedf986eaa17aa31)
1*13885a66Sdarrenr /*	$NetBSD: load_http.c,v 1.3 2012/07/22 14:27:36 darrenr Exp $	*/
2bc4097aaSchristos 
3bc4097aaSchristos /*
4*13885a66Sdarrenr  * Copyright (C) 2012 by Darren Reed.
5bc4097aaSchristos  *
6bc4097aaSchristos  * See the IPFILTER.LICENCE file for details on licencing.
7bc4097aaSchristos  *
8*13885a66Sdarrenr  * Id: load_http.c,v 1.1.1.2 2012/07/22 13:44:39 darrenr
9bc4097aaSchristos  */
10bc4097aaSchristos 
11bc4097aaSchristos #include "ipf.h"
12bc4097aaSchristos #include <ctype.h>
13bc4097aaSchristos 
14bc4097aaSchristos /*
15bc4097aaSchristos  * Because the URL can be included twice into the buffer, once as the
16bc4097aaSchristos  * full path for the "GET" and once as the "Host:", the buffer it is
17bc4097aaSchristos  * put in needs to be larger than 512*2 to make room for the supporting
18bc4097aaSchristos  * text. Why not just use snprintf and truncate? The warning about the
19bc4097aaSchristos  * URL being too long tells you something is wrong and does not fetch
20bc4097aaSchristos  * any data - just truncating the URL (with snprintf, etc) and sending
21bc4097aaSchristos  * that to the server is allowing an unknown and unintentioned action
22bc4097aaSchristos  * to happen.
23bc4097aaSchristos  */
24bc4097aaSchristos #define	MAX_URL_LEN	512
25bc4097aaSchristos #define	LOAD_BUFSIZE	(MAX_URL_LEN * 2 + 128)
26bc4097aaSchristos 
27bc4097aaSchristos /*
28bc4097aaSchristos  * Format expected is one addres per line, at the start of each line.
29bc4097aaSchristos  */
30bc4097aaSchristos alist_t *
load_http(char * url)31bc4097aaSchristos load_http(char *url)
32bc4097aaSchristos {
33bc4097aaSchristos 	int fd, len, left, port, endhdr, removed, linenum = 0;
34bc4097aaSchristos 	char *s, *t, *u, buffer[LOAD_BUFSIZE], *myurl;
35bc4097aaSchristos 	alist_t *a, *rtop, *rbot;
36fe7112e3Schristos 	int rem;
37bc4097aaSchristos 
38bc4097aaSchristos 	/*
39bc4097aaSchristos 	 * More than this would just be absurd.
40bc4097aaSchristos 	 */
41bc4097aaSchristos 	if (strlen(url) > MAX_URL_LEN) {
42bc4097aaSchristos 		fprintf(stderr, "load_http has a URL > %d bytes?!\n",
43bc4097aaSchristos 			MAX_URL_LEN);
44bc4097aaSchristos 		return NULL;
45bc4097aaSchristos 	}
46bc4097aaSchristos 
47bc4097aaSchristos 	fd = -1;
48bc4097aaSchristos 	rtop = NULL;
49bc4097aaSchristos 	rbot = NULL;
50bc4097aaSchristos 
51bc4097aaSchristos 	myurl = strdup(url);
52bc4097aaSchristos 	if (myurl == NULL)
53bc4097aaSchristos 		goto done;
54bc4097aaSchristos 
55fe7112e3Schristos 	rem = sizeof(buffer);
56fe7112e3Schristos 	left = snprintf(buffer, rem, "GET %s HTTP/1.0\r\n", url);
57fe7112e3Schristos 	if (left < 0 || left > rem)
58fe7112e3Schristos 		goto done;
59fe7112e3Schristos 	rem -= left;
60fe7112e3Schristos 
61bc4097aaSchristos 	s = myurl + 7;			/* http:// */
62bc4097aaSchristos 	t = strchr(s, '/');
63bc4097aaSchristos 	if (t == NULL) {
64bc4097aaSchristos 		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
65fe7112e3Schristos 		goto done;
66bc4097aaSchristos 	}
67bc4097aaSchristos 	*t++ = '\0';
68bc4097aaSchristos 
69bc4097aaSchristos 	/*
70bc4097aaSchristos 	 * 10 is the length of 'Host: \r\n\r\n' below.
71bc4097aaSchristos 	 */
72bc4097aaSchristos 	if (strlen(s) + strlen(buffer) + 10 > sizeof(buffer)) {
73bc4097aaSchristos 		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
74bc4097aaSchristos 		free(myurl);
75bc4097aaSchristos 		return NULL;
76bc4097aaSchristos 	}
77bc4097aaSchristos 
78bc4097aaSchristos 	u = strchr(s, '@');
79bc4097aaSchristos 	if (u != NULL)
80bc4097aaSchristos 		s = u + 1;		/* AUTH */
81bc4097aaSchristos 
82fe7112e3Schristos 	left = snprintf(buffer + left, rem, "Host: %s\r\n\r\n", s);
83fe7112e3Schristos 	if (left < 0 || left > rem)
84fe7112e3Schristos 		goto done;
85fe7112e3Schristos 	rem -= left;
86bc4097aaSchristos 
87bc4097aaSchristos 	u = strchr(s, ':');
88bc4097aaSchristos 	if (u != NULL) {
89bc4097aaSchristos 		*u++ = '\0';
90bc4097aaSchristos 		port = atoi(u);
91bc4097aaSchristos 		if (port < 0 || port > 65535)
92bc4097aaSchristos 			goto done;
93bc4097aaSchristos 	} else {
94bc4097aaSchristos 		port = 80;
95bc4097aaSchristos 	}
96bc4097aaSchristos 
97bc4097aaSchristos 
98bc4097aaSchristos 	fd = connecttcp(s, port);
99bc4097aaSchristos 	if (fd == -1)
100bc4097aaSchristos 		goto done;
101bc4097aaSchristos 
102bc4097aaSchristos 	len = strlen(buffer);
103fe7112e3Schristos 	if (write(fd, buffer, len) != len)
104bc4097aaSchristos 		goto done;
105bc4097aaSchristos 
106bc4097aaSchristos 	s = buffer;
107bc4097aaSchristos 	endhdr = 0;
108bc4097aaSchristos 	left = sizeof(buffer) - 1;
109bc4097aaSchristos 
110bc4097aaSchristos 	while ((len = read(fd, s, left)) > 0) {
111bc4097aaSchristos 		s[len] = '\0';
112bc4097aaSchristos 		left -= len;
113bc4097aaSchristos 		s += len;
114bc4097aaSchristos 
115bc4097aaSchristos 		if (endhdr >= 0) {
116bc4097aaSchristos 			if (endhdr == 0) {
117bc4097aaSchristos 				t = strchr(buffer, ' ');
118bc4097aaSchristos 				if (t == NULL)
119bc4097aaSchristos 					continue;
120bc4097aaSchristos 				t++;
121bc4097aaSchristos 				if (*t != '2')
122bc4097aaSchristos 					break;
123bc4097aaSchristos 			}
124bc4097aaSchristos 
125bc4097aaSchristos 			u = buffer;
126bc4097aaSchristos 			while ((t = strchr(u, '\r')) != NULL) {
127bc4097aaSchristos 				if (t == u) {
128bc4097aaSchristos 					if (*(t + 1) == '\n') {
129bc4097aaSchristos 						u = t + 2;
130bc4097aaSchristos 						endhdr = -1;
131bc4097aaSchristos 						break;
132bc4097aaSchristos 					} else
133bc4097aaSchristos 						t++;
134bc4097aaSchristos 				} else if (*(t + 1) == '\n') {
135bc4097aaSchristos 					endhdr++;
136bc4097aaSchristos 					u = t + 2;
137bc4097aaSchristos 				} else
138bc4097aaSchristos 					u = t + 1;
139bc4097aaSchristos 			}
140bc4097aaSchristos 			if (endhdr >= 0)
141bc4097aaSchristos 				continue;
142bc4097aaSchristos 			removed = (u - buffer) + 1;
143bc4097aaSchristos 			memmove(buffer, u, (sizeof(buffer) - left) - removed);
144bc4097aaSchristos 			s -= removed;
145bc4097aaSchristos 			left += removed;
146bc4097aaSchristos 		}
147bc4097aaSchristos 
148bc4097aaSchristos 		do {
149bc4097aaSchristos 			t = strchr(buffer, '\n');
150bc4097aaSchristos 			if (t == NULL)
151bc4097aaSchristos 				break;
152bc4097aaSchristos 
153bc4097aaSchristos 			linenum++;
154bc4097aaSchristos 			*t = '\0';
155bc4097aaSchristos 
156fe7112e3Schristos 			for (u = buffer; isdigit((unsigned char)*u) ||
157fe7112e3Schristos 			    (*u == '.'); u++)
158fe7112e3Schristos 				continue;
159fe7112e3Schristos 			if (*u == '/') {
160fe7112e3Schristos 				char *slash;
161fe7112e3Schristos 
162fe7112e3Schristos 				slash = u;
163fe7112e3Schristos 				u++;
164fe7112e3Schristos 				while (isdigit((unsigned char)*u))
165fe7112e3Schristos 					u++;
166fe7112e3Schristos 				if (!isspace((unsigned char)*u) && *u)
167fe7112e3Schristos 					u = slash;
168fe7112e3Schristos 			}
169fe7112e3Schristos 
170bc4097aaSchristos 			/*
171bc4097aaSchristos 			 * Remove comment and continue to the next line if
172bc4097aaSchristos 			 * the comment is at the start of the line.
173bc4097aaSchristos 			 */
174bc4097aaSchristos 			u = strchr(buffer, '#');
175bc4097aaSchristos 			if (u != NULL) {
176bc4097aaSchristos 				*u = '\0';
177fe7112e3Schristos 				if (u == buffer)
178bc4097aaSchristos 					continue;
179bc4097aaSchristos 			}
180bc4097aaSchristos 
181bc4097aaSchristos 			/*
182bc4097aaSchristos 			 * Trim off tailing white spaces, will include \r
183bc4097aaSchristos 			 */
184bc4097aaSchristos 			for (u = t - 1; (u >= buffer) && ISSPACE(*u); u--)
185bc4097aaSchristos 				*u = '\0';
186bc4097aaSchristos 
187bc4097aaSchristos 			a = alist_new(AF_UNSPEC, buffer);
188bc4097aaSchristos 			if (a != NULL) {
189bc4097aaSchristos 				if (rbot != NULL)
190bc4097aaSchristos 					rbot->al_next = a;
191bc4097aaSchristos 				else
192bc4097aaSchristos 					rtop = a;
193bc4097aaSchristos 				rbot = a;
194bc4097aaSchristos 			} else {
195bc4097aaSchristos 				fprintf(stderr,
196bc4097aaSchristos 					"%s:%d unrecognised content:%s\n",
197bc4097aaSchristos 					url, linenum, buffer);
198bc4097aaSchristos 			}
199bc4097aaSchristos 
200bc4097aaSchristos 			t++;
201bc4097aaSchristos 			removed = t - buffer;
202bc4097aaSchristos 			memmove(buffer, t, sizeof(buffer) - left - removed);
203bc4097aaSchristos 			s -= removed;
204bc4097aaSchristos 			left += removed;
205bc4097aaSchristos 
206bc4097aaSchristos 		} while (1);
207bc4097aaSchristos 	}
208bc4097aaSchristos 
209bc4097aaSchristos done:
210bc4097aaSchristos 	if (myurl != NULL)
211bc4097aaSchristos 		free(myurl);
212bc4097aaSchristos 	if (fd != -1)
213bc4097aaSchristos 		close(fd);
214bc4097aaSchristos 	return rtop;
215bc4097aaSchristos }
216