xref: /openbsd-src/usr.bin/rsync/rules.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 #include <err.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 
6 #include "extern.h"
7 
8 struct rule {
9 	char			*pattern;
10 	enum rule_type		type;
11 #ifdef NOTYET
12 	unsigned int		modifiers;
13 #endif
14 	short			 numseg;
15 	unsigned char		 anchored;
16 	unsigned char		 fileonly;
17 	unsigned char		 nowild;
18 	unsigned char		 onlydir;
19 	unsigned char		 leadingdir;
20 };
21 
22 static struct rule	*rules;
23 static size_t		 numrules;	/* number of rules */
24 static size_t		 rulesz;	/* available size */
25 
26 /* up to protocol 29 filter rules only support - + ! and no modifiers */
27 
28 const struct command {
29 	enum rule_type		type;
30 	char			sopt;
31 	const char		*lopt;
32 } commands[] = {
33 	{ RULE_EXCLUDE,		'-',	"exclude" },
34 	{ RULE_INCLUDE,		'+',	"include" },
35 	{ RULE_CLEAR,		'!',	"clear" },
36 #ifdef NOTYET
37 	{ RULE_MERGE,		'.',	"merge" },
38 	{ RULE_DIR_MERGE,	':',	"dir-merge" },
39 	{ RULE_SHOW,		'S',	"show" },
40 	{ RULE_HIDE,		'H',	"hide" },
41 	{ RULE_PROTECT,		'P',	"protect" },
42 	{ RULE_RISK,		'R',	"risk" },
43 #endif
44 	{ 0 }
45 };
46 
47 #ifdef NOTYET
48 #define MOD_ABSOLUTE			0x0001
49 #define MOD_NEGATE			0x0002
50 #define MOD_CVSEXCLUDE			0x0004
51 #define MOD_SENDING			0x0008
52 #define MOD_RECEIVING			0x0010
53 #define MOD_PERISHABLE			0x0020
54 #define MOD_XATTR			0x0040
55 #define MOD_MERGE_EXCLUDE		0x0080
56 #define MOD_MERGE_INCLUDE		0x0100
57 #define MOD_MERGE_CVSCOMPAT		0x0200
58 #define MOD_MERGE_EXCLUDE_FILE		0x0400
59 #define MOD_MERGE_NO_INHERIT		0x0800
60 #define MOD_MERGE_WORDSPLIT		0x1000
61 
62 /* maybe support absolute and negate */
63 const struct modifier {
64 	unsigned int		modifier;
65 	char			sopt;
66 } modifiers[] = {
67 	{ MOD_ABSOLUTE,			'/' },
68 	{ MOD_NEGATE,			'!' },
69 	{ MOD_CVSEXCLUDE,		'C' },
70 	{ MOD_SENDING,			's' },
71 	{ MOD_RECEIVING,		'r' },
72 	{ MOD_PERISHABLE,		'p' },
73 	{ MOD_XATTR,			'x' },
74 	/* for '.' and ':' types */
75 	{ MOD_MERGE_EXCLUDE,		'-' },
76 	{ MOD_MERGE_INCLUDE,		'+' },
77 	{ MOD_MERGE_CVSCOMPAT,		'C' },
78 	{ MOD_MERGE_EXCLUDE_FILE,	'e' },
79 	{ MOD_MERGE_NO_INHERIT,		'n' },
80 	{ MOD_MERGE_WORDSPLIT,		'w' },
81 	{ 0 }
82 }
83 #endif
84 
85 static struct rule *
86 get_next_rule(void)
87 {
88 	struct rule *new;
89 	size_t newsz;
90 
91 	if (++numrules > rulesz) {
92 		if (rulesz == 0)
93 			newsz = 16;
94 		else
95 			newsz = rulesz * 2;
96 
97 		new = recallocarray(rules, rulesz, newsz, sizeof(*rules));
98 		if (new == NULL)
99 			err(ERR_NOMEM, NULL);
100 
101 		rules = new;
102 		rulesz = newsz;
103 	}
104 
105 	return rules + numrules - 1;
106 }
107 
108 static enum rule_type
109 parse_command(const char *command, size_t len)
110 {
111 	const char *mod;
112 	size_t	i;
113 
114 	mod = memchr(command, ',', len);
115 	if (mod != NULL) {
116 		/* XXX modifiers not yet implemented */
117 		return RULE_NONE;
118 	}
119 
120 	for (i = 0; commands[i].type != RULE_NONE; i++) {
121 		if (strncmp(commands[i].lopt, command, len) == 0)
122 			return commands[i].type;
123 		if (len == 1 && commands[i].sopt == *command)
124 			return commands[i].type;
125 	}
126 
127 	return RULE_NONE;
128 }
129 
130 static void
131 parse_pattern(struct rule *r, char *pattern)
132 {
133 	size_t plen;
134 	char *p;
135 	short nseg = 1;
136 
137 	/*
138 	 * check for / at start and end of pattern both are special and
139 	 * can bypass full path matching.
140 	 */
141 	if (*pattern == '/') {
142 		pattern++;
143 		r->anchored = 1;
144 	}
145 	plen = strlen(pattern);
146 	/*
147 	 * check for patterns ending in '/' and '/'+'***' and handle them
148 	 * specially. Because of this and the check above pattern will never
149 	 * start or end with a '/'.
150 	 */
151 	if (plen > 1 && pattern[plen - 1] == '/') {
152 		r->onlydir = 1;
153 		pattern[plen - 1] = '\0';
154 	}
155 	if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {
156 		r->leadingdir = 1;
157 		pattern[plen - 4] = '\0';
158 	}
159 
160 	/* count how many segments the pattern has. */
161 	for (p = pattern; *p != '\0'; p++)
162 		if (*p == '/')
163 			nseg++;
164 	r->numseg = nseg;
165 
166 	/* check if this pattern only matches against the basename */
167 	if (nseg == 1 && !r->anchored)
168 		r->fileonly = 1;
169 
170 	if (strpbrk(pattern, "*?[") == NULL) {
171 		/* no wildchar matching */
172 		r->nowild = 1;
173 	} else {
174 		/* requires wildchar matching */
175 		if (strstr(pattern, "**") != NULL)
176 			r->numseg = -1;
177 	}
178 
179 	r->pattern = strdup(pattern);
180 	if (r->pattern == NULL)
181 		err(ERR_NOMEM, NULL);
182 }
183 
184 int
185 parse_rule(char *line, enum rule_type def)
186 {
187 	enum rule_type type;
188 	struct rule *r;
189 	char *pattern;
190 	size_t len;
191 
192 	switch (*line) {
193 	case '#':
194 	case ';':
195 		/* comment */
196 		return 0;
197 	case '\0':
198 		/* ingore empty lines */
199 		return 0;
200 	default:
201 		len = strcspn(line, " _");
202 		type = parse_command(line, len);
203 		if (type == RULE_NONE) {
204 			if (def == RULE_NONE)
205 				return -1;
206 			type = def;
207 			pattern = line;
208 		} else
209 			pattern = line + len + 1;
210 
211 		if (*pattern == '\0' && type != RULE_CLEAR)
212 			return -1;
213 		if (*pattern != '\0' && type == RULE_CLEAR)
214 			return -1;
215 		break;
216 	}
217 
218 	r = get_next_rule();
219 	r->type = type;
220 	parse_pattern(r, pattern);
221 
222 	return 0;
223 }
224 
225 void
226 parse_file(const char *file, enum rule_type def)
227 {
228 	FILE *fp;
229 	char *line = NULL;
230 	size_t linesize = 0, linenum = 0;
231 	ssize_t linelen;
232 
233 	if ((fp = fopen(file, "r")) == NULL)
234 		err(ERR_SYNTAX, "open: %s", file);
235 
236 	while ((linelen = getline(&line, &linesize, fp)) != -1) {
237 		linenum++;
238 		line[linelen - 1] = '\0';
239 		if (parse_rule(line, def) == -1)
240 			errx(ERR_SYNTAX, "syntax error in %s at entry %zu",
241 			    file, linenum);
242 	}
243 
244 	free(line);
245 	if (ferror(fp))
246 		err(ERR_SYNTAX, "failed to parse file %s", file);
247 	fclose(fp);
248 }
249 
250 static const char *
251 send_command(struct rule *r)
252 {
253 	static char buf[16];
254 	char *b = buf;
255 	char *ep = buf + sizeof(buf);
256 
257 	switch (r->type) {
258 	case RULE_EXCLUDE:
259 		*b++ = '-';
260 		break;
261 	case RULE_INCLUDE:
262 		*b++ = '+';
263 		break;
264 	case RULE_CLEAR:
265 		*b++ = '!';
266 		break;
267 #ifdef NOTYET
268 	case RULE_MERGE:
269 		*b++ = '.';
270 		break;
271 	case RULE_DIR_MERGE:
272 		*b++ = ':';
273 		break;
274 	case RULE_SHOW:
275 		*b++ = 'S';
276 		break;
277 	case RULE_HIDE:
278 		*b++ = 'H';
279 		break;
280 	case RULE_PROTECT:
281 		*b++ = 'P';
282 		break;
283 	case RULE_RISK:
284 		*b++ = 'R';
285 		break;
286 #endif
287 	default:
288 		err(ERR_SYNTAX, "unknown rule type %d", r->type);
289 	}
290 
291 #ifdef NOTYET
292 	for (i = 0; modifiers[i].modifier != 0; i++) {
293 		if (rule->modifiers & modifiers[i].modifier)
294 			*b++ = modifiers[i].sopt;
295 		if (b >= ep - 3)
296 			err(ERR_SYNTAX, "rule modifiers overflow");
297 	}
298 #endif
299 	if (b >= ep - 3)
300 		err(ERR_SYNTAX, "rule prefix overflow");
301 	*b++ = ' ';
302 
303 	/* include the stripped root '/' for anchored patterns */
304 	if (r->anchored)
305 		*b++ = '/';
306 	*b++ = '\0';
307 	return buf;
308 }
309 
310 static const char *
311 postfix_command(struct rule *r)
312 {
313 	static char buf[8];
314 
315 	buf[0] = '\0';
316 	if (r->onlydir)
317 		strlcpy(buf, "/", sizeof(buf));
318 	if (r->leadingdir)
319 		strlcpy(buf, "/***", sizeof(buf));
320 
321 	return buf;
322 }
323 
324 void
325 send_rules(struct sess *sess, int fd)
326 {
327 	const char *cmd;
328 	const char *postfix;
329 	struct rule *r;
330 	size_t cmdlen, len, postlen, i;
331 
332 	for (i = 0; i < numrules; i++) {
333 		r = &rules[i];
334 		cmd = send_command(r);
335 		if (cmd == NULL)
336 			err(ERR_PROTOCOL,
337 			    "rules are incompatible with remote rsync");
338 		postfix = postfix_command(r);
339 		cmdlen = strlen(cmd);
340 		len = strlen(r->pattern);
341 		postlen = strlen(postfix);
342 
343 		if (!io_write_int(sess, fd, cmdlen + len + postlen))
344 			err(ERR_SOCK_IO, "send rules");
345 		if (!io_write_buf(sess, fd, cmd, cmdlen))
346 			err(ERR_SOCK_IO, "send rules");
347 		if (!io_write_buf(sess, fd, r->pattern, len))
348 			err(ERR_SOCK_IO, "send rules");
349 		/* include the '/' stripped by onlydir */
350 		if (postlen > 0)
351 			if (!io_write_buf(sess, fd, postfix, postlen))
352 				err(ERR_SOCK_IO, "send rules");
353 	}
354 
355 	if (!io_write_int(sess, fd, 0))
356 		err(ERR_SOCK_IO, "send rules");
357 }
358 
359 void
360 recv_rules(struct sess *sess, int fd)
361 {
362 	char line[8192];
363 	size_t len;
364 
365 	do {
366 		if (!io_read_size(sess, fd, &len))
367 			err(ERR_SOCK_IO, "receive rules");
368 
369 		if (len == 0)
370 			return;
371 		if (len >= sizeof(line) - 1)
372 			errx(ERR_SOCK_IO, "received rule too long");
373 		if (!io_read_buf(sess, fd, line, len))
374 			err(ERR_SOCK_IO, "receive rules");
375 		line[len] = '\0';
376 		if (parse_rule(line, RULE_NONE) == -1)
377 			errx(ERR_PROTOCOL, "syntax error in received rules");
378 	} while (1);
379 }
380 
381 static inline int
382 rule_matched(struct rule *r)
383 {
384 	/* TODO apply negation once modifiers are added */
385 
386 	if (r->type == RULE_EXCLUDE)
387 		return -1;
388 	else
389 		return 1;
390 }
391 
392 int
393 rules_match(const char *path, int isdir)
394 {
395 	const char *basename, *p = NULL;
396 	struct rule *r;
397 	size_t i;
398 
399 	basename = strrchr(path, '/');
400 	if (basename != NULL)
401 		basename += 1;
402 	else
403 		basename = path;
404 
405 	for (i = 0; i < numrules; i++) {
406 		r = &rules[i];
407 
408 		if (r->onlydir && !isdir)
409 			continue;
410 
411 		if (r->nowild) {
412 			/* fileonly and anchored are mutually exclusive */
413 			if (r->fileonly) {
414 				if (strcmp(basename, r->pattern) == 0)
415 					return rule_matched(r);
416 			} else if (r->anchored) {
417 				/*
418 				 * assumes that neither path nor pattern
419 				 * start with a '/'.
420 				 */
421 				if (strcmp(path, r->pattern) == 0)
422 					return rule_matched(r);
423 			} else if (r->leadingdir) {
424 				size_t plen = strlen(r->pattern);
425 
426 				p = strstr(path, r->pattern);
427 				/*
428 				 * match from start or dir boundary also
429 				 * match to end or to dir boundary
430 				 */
431 				if (p != NULL && (p == path || p[-1] == '/') &&
432 				    (p[plen] == '\0' || p[plen] == '/'))
433 					return rule_matched(r);
434 			} else {
435 				size_t len = strlen(path);
436 				size_t plen = strlen(r->pattern);
437 
438 				if (len >= plen && strcmp(path + len - plen,
439 				    r->pattern) == 0) {
440 					/* match all or start on dir boundary */
441 					if (len == plen ||
442 					    path[len - plen - 1] == '/')
443 						return rule_matched(r);
444 				}
445 			}
446 		} else {
447 			if (r->fileonly) {
448 				p = basename;
449 			} else if (r->anchored || r->numseg == -1) {
450 				/* full path matching */
451 				p = path;
452 			} else {
453 				short nseg = 1;
454 
455 				/* match against the last numseg elements */
456 				for (p = path; *p != '\0'; p++)
457 					if (*p == '/')
458 						nseg++;
459 				if (nseg < r->numseg) {
460 					p = NULL;
461 				} else {
462 					nseg -= r->numseg;
463 					for (p = path; *p != '\0' && nseg > 0;
464 					    p++) {
465 						if (*p == '/')
466 							nseg--;
467 					}
468 				}
469 			}
470 
471 			if (p != NULL) {
472 				if (rmatch(r->pattern, p, r->leadingdir) == 0)
473 					return rule_matched(r);
474 			}
475 		}
476 	}
477 
478 	return 0;
479 }
480