1*13885a66Sdarrenr /* $NetBSD: load_http.c,v 1.3 2012/07/22 14:27:36 darrenr Exp $ */
2bc4097aaSchristos
3bc4097aaSchristos /*
4*13885a66Sdarrenr * Copyright (C) 2012 by Darren Reed.
5bc4097aaSchristos *
6bc4097aaSchristos * See the IPFILTER.LICENCE file for details on licencing.
7bc4097aaSchristos *
8*13885a66Sdarrenr * Id: load_http.c,v 1.1.1.2 2012/07/22 13:44:39 darrenr
9bc4097aaSchristos */
10bc4097aaSchristos
11bc4097aaSchristos #include "ipf.h"
12bc4097aaSchristos #include <ctype.h>
13bc4097aaSchristos
14bc4097aaSchristos /*
15bc4097aaSchristos * Because the URL can be included twice into the buffer, once as the
16bc4097aaSchristos * full path for the "GET" and once as the "Host:", the buffer it is
17bc4097aaSchristos * put in needs to be larger than 512*2 to make room for the supporting
18bc4097aaSchristos * text. Why not just use snprintf and truncate? The warning about the
19bc4097aaSchristos * URL being too long tells you something is wrong and does not fetch
20bc4097aaSchristos * any data - just truncating the URL (with snprintf, etc) and sending
21bc4097aaSchristos * that to the server is allowing an unknown and unintentioned action
22bc4097aaSchristos * to happen.
23bc4097aaSchristos */
24bc4097aaSchristos #define MAX_URL_LEN 512
25bc4097aaSchristos #define LOAD_BUFSIZE (MAX_URL_LEN * 2 + 128)
26bc4097aaSchristos
27bc4097aaSchristos /*
28bc4097aaSchristos * Format expected is one addres per line, at the start of each line.
29bc4097aaSchristos */
30bc4097aaSchristos alist_t *
load_http(char * url)31bc4097aaSchristos load_http(char *url)
32bc4097aaSchristos {
33bc4097aaSchristos int fd, len, left, port, endhdr, removed, linenum = 0;
34bc4097aaSchristos char *s, *t, *u, buffer[LOAD_BUFSIZE], *myurl;
35bc4097aaSchristos alist_t *a, *rtop, *rbot;
36fe7112e3Schristos int rem;
37bc4097aaSchristos
38bc4097aaSchristos /*
39bc4097aaSchristos * More than this would just be absurd.
40bc4097aaSchristos */
41bc4097aaSchristos if (strlen(url) > MAX_URL_LEN) {
42bc4097aaSchristos fprintf(stderr, "load_http has a URL > %d bytes?!\n",
43bc4097aaSchristos MAX_URL_LEN);
44bc4097aaSchristos return NULL;
45bc4097aaSchristos }
46bc4097aaSchristos
47bc4097aaSchristos fd = -1;
48bc4097aaSchristos rtop = NULL;
49bc4097aaSchristos rbot = NULL;
50bc4097aaSchristos
51bc4097aaSchristos myurl = strdup(url);
52bc4097aaSchristos if (myurl == NULL)
53bc4097aaSchristos goto done;
54bc4097aaSchristos
55fe7112e3Schristos rem = sizeof(buffer);
56fe7112e3Schristos left = snprintf(buffer, rem, "GET %s HTTP/1.0\r\n", url);
57fe7112e3Schristos if (left < 0 || left > rem)
58fe7112e3Schristos goto done;
59fe7112e3Schristos rem -= left;
60fe7112e3Schristos
61bc4097aaSchristos s = myurl + 7; /* http:// */
62bc4097aaSchristos t = strchr(s, '/');
63bc4097aaSchristos if (t == NULL) {
64bc4097aaSchristos fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
65fe7112e3Schristos goto done;
66bc4097aaSchristos }
67bc4097aaSchristos *t++ = '\0';
68bc4097aaSchristos
69bc4097aaSchristos /*
70bc4097aaSchristos * 10 is the length of 'Host: \r\n\r\n' below.
71bc4097aaSchristos */
72bc4097aaSchristos if (strlen(s) + strlen(buffer) + 10 > sizeof(buffer)) {
73bc4097aaSchristos fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
74bc4097aaSchristos free(myurl);
75bc4097aaSchristos return NULL;
76bc4097aaSchristos }
77bc4097aaSchristos
78bc4097aaSchristos u = strchr(s, '@');
79bc4097aaSchristos if (u != NULL)
80bc4097aaSchristos s = u + 1; /* AUTH */
81bc4097aaSchristos
82fe7112e3Schristos left = snprintf(buffer + left, rem, "Host: %s\r\n\r\n", s);
83fe7112e3Schristos if (left < 0 || left > rem)
84fe7112e3Schristos goto done;
85fe7112e3Schristos rem -= left;
86bc4097aaSchristos
87bc4097aaSchristos u = strchr(s, ':');
88bc4097aaSchristos if (u != NULL) {
89bc4097aaSchristos *u++ = '\0';
90bc4097aaSchristos port = atoi(u);
91bc4097aaSchristos if (port < 0 || port > 65535)
92bc4097aaSchristos goto done;
93bc4097aaSchristos } else {
94bc4097aaSchristos port = 80;
95bc4097aaSchristos }
96bc4097aaSchristos
97bc4097aaSchristos
98bc4097aaSchristos fd = connecttcp(s, port);
99bc4097aaSchristos if (fd == -1)
100bc4097aaSchristos goto done;
101bc4097aaSchristos
102bc4097aaSchristos len = strlen(buffer);
103fe7112e3Schristos if (write(fd, buffer, len) != len)
104bc4097aaSchristos goto done;
105bc4097aaSchristos
106bc4097aaSchristos s = buffer;
107bc4097aaSchristos endhdr = 0;
108bc4097aaSchristos left = sizeof(buffer) - 1;
109bc4097aaSchristos
110bc4097aaSchristos while ((len = read(fd, s, left)) > 0) {
111bc4097aaSchristos s[len] = '\0';
112bc4097aaSchristos left -= len;
113bc4097aaSchristos s += len;
114bc4097aaSchristos
115bc4097aaSchristos if (endhdr >= 0) {
116bc4097aaSchristos if (endhdr == 0) {
117bc4097aaSchristos t = strchr(buffer, ' ');
118bc4097aaSchristos if (t == NULL)
119bc4097aaSchristos continue;
120bc4097aaSchristos t++;
121bc4097aaSchristos if (*t != '2')
122bc4097aaSchristos break;
123bc4097aaSchristos }
124bc4097aaSchristos
125bc4097aaSchristos u = buffer;
126bc4097aaSchristos while ((t = strchr(u, '\r')) != NULL) {
127bc4097aaSchristos if (t == u) {
128bc4097aaSchristos if (*(t + 1) == '\n') {
129bc4097aaSchristos u = t + 2;
130bc4097aaSchristos endhdr = -1;
131bc4097aaSchristos break;
132bc4097aaSchristos } else
133bc4097aaSchristos t++;
134bc4097aaSchristos } else if (*(t + 1) == '\n') {
135bc4097aaSchristos endhdr++;
136bc4097aaSchristos u = t + 2;
137bc4097aaSchristos } else
138bc4097aaSchristos u = t + 1;
139bc4097aaSchristos }
140bc4097aaSchristos if (endhdr >= 0)
141bc4097aaSchristos continue;
142bc4097aaSchristos removed = (u - buffer) + 1;
143bc4097aaSchristos memmove(buffer, u, (sizeof(buffer) - left) - removed);
144bc4097aaSchristos s -= removed;
145bc4097aaSchristos left += removed;
146bc4097aaSchristos }
147bc4097aaSchristos
148bc4097aaSchristos do {
149bc4097aaSchristos t = strchr(buffer, '\n');
150bc4097aaSchristos if (t == NULL)
151bc4097aaSchristos break;
152bc4097aaSchristos
153bc4097aaSchristos linenum++;
154bc4097aaSchristos *t = '\0';
155bc4097aaSchristos
156fe7112e3Schristos for (u = buffer; isdigit((unsigned char)*u) ||
157fe7112e3Schristos (*u == '.'); u++)
158fe7112e3Schristos continue;
159fe7112e3Schristos if (*u == '/') {
160fe7112e3Schristos char *slash;
161fe7112e3Schristos
162fe7112e3Schristos slash = u;
163fe7112e3Schristos u++;
164fe7112e3Schristos while (isdigit((unsigned char)*u))
165fe7112e3Schristos u++;
166fe7112e3Schristos if (!isspace((unsigned char)*u) && *u)
167fe7112e3Schristos u = slash;
168fe7112e3Schristos }
169fe7112e3Schristos
170bc4097aaSchristos /*
171bc4097aaSchristos * Remove comment and continue to the next line if
172bc4097aaSchristos * the comment is at the start of the line.
173bc4097aaSchristos */
174bc4097aaSchristos u = strchr(buffer, '#');
175bc4097aaSchristos if (u != NULL) {
176bc4097aaSchristos *u = '\0';
177fe7112e3Schristos if (u == buffer)
178bc4097aaSchristos continue;
179bc4097aaSchristos }
180bc4097aaSchristos
181bc4097aaSchristos /*
182bc4097aaSchristos * Trim off tailing white spaces, will include \r
183bc4097aaSchristos */
184bc4097aaSchristos for (u = t - 1; (u >= buffer) && ISSPACE(*u); u--)
185bc4097aaSchristos *u = '\0';
186bc4097aaSchristos
187bc4097aaSchristos a = alist_new(AF_UNSPEC, buffer);
188bc4097aaSchristos if (a != NULL) {
189bc4097aaSchristos if (rbot != NULL)
190bc4097aaSchristos rbot->al_next = a;
191bc4097aaSchristos else
192bc4097aaSchristos rtop = a;
193bc4097aaSchristos rbot = a;
194bc4097aaSchristos } else {
195bc4097aaSchristos fprintf(stderr,
196bc4097aaSchristos "%s:%d unrecognised content:%s\n",
197bc4097aaSchristos url, linenum, buffer);
198bc4097aaSchristos }
199bc4097aaSchristos
200bc4097aaSchristos t++;
201bc4097aaSchristos removed = t - buffer;
202bc4097aaSchristos memmove(buffer, t, sizeof(buffer) - left - removed);
203bc4097aaSchristos s -= removed;
204bc4097aaSchristos left += removed;
205bc4097aaSchristos
206bc4097aaSchristos } while (1);
207bc4097aaSchristos }
208bc4097aaSchristos
209bc4097aaSchristos done:
210bc4097aaSchristos if (myurl != NULL)
211bc4097aaSchristos free(myurl);
212bc4097aaSchristos if (fd != -1)
213bc4097aaSchristos close(fd);
214bc4097aaSchristos return rtop;
215bc4097aaSchristos }
216